diff --git a/SpeechT5 b/SpeechT5 deleted file mode 160000 index 8b5ade783571e63450aaa5507444150dcb08fa94..0000000000000000000000000000000000000000 --- a/SpeechT5 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8b5ade783571e63450aaa5507444150dcb08fa94 diff --git a/SpeechT5/CODE_OF_CONDUCT.md b/SpeechT5/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000000000000000000000000000000..f9ba8cf65f3e3104dd061c178066ec8247811f33 --- /dev/null +++ b/SpeechT5/CODE_OF_CONDUCT.md @@ -0,0 +1,9 @@ +# Microsoft Open Source Code of Conduct + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). + +Resources: + +- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) +- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) +- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns diff --git a/SpeechT5/LICENSE b/SpeechT5/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..9e841e7a26e4eb057b24511e7b92d42b257a80e5 --- /dev/null +++ b/SpeechT5/LICENSE @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/SpeechT5/README.md b/SpeechT5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..aa825607e61ad8e7e9c0b47161b38df0961a0654 --- /dev/null +++ b/SpeechT5/README.md @@ -0,0 +1,267 @@ +# SpeechT5 + +Unified-modal speech-text pre-training for spoken language processing: + +> [**SpeechT5**](https://arxiv.org/abs/2110.07205) (```ACL 2022```): **SpeechT5: Unified-Modal Encoder-Decoder Pre-training for Spoken Language Processing** + +> [**Speech2C**](https://arxiv.org/abs/2203.17113) (```INTERSPEECH 2022```): **Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data** + +> [**YiTrans**](https://arxiv.org/abs/2206.05777) (```IWSLT 2022```): **The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task** + +> [**SpeechUT**](https://arxiv.org/abs/2210.03730) (```EMNLP 2022```): **SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training** + +> [**SpeechLM**](https://arxiv.org/abs/2209.15329) (```Arxiv 2022```): **SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data** + +> [**Speech2S**](https://arxiv.org/abs/2210.17027) (```ICASSP 2023```): **Joint Pre-Training with Speech and Bilingual Text for Direct Speech to Speech Translation** + +> [**Prosody-SpeechT5**](https://ieeexplore.ieee.org/document/10096530/) (```ICASSP 2023```): **Prosody-aware SpeechT5 for Expressive Neural TTS** + +> [**VATLM**](https://arxiv.org/abs/2211.11275) (```IEEE Transactions on Multimedia```): **VATLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning** + +> [**VALL-E X**](https://arxiv.org/abs/2303.03926) (```Arxiv 2023```): **Speak Foreign Languages with Your Own Voice: Cross-Lingual Neural Codec Language Modeling** + +> [**VioLA**](https://arxiv.org/abs/2305.16107) (```Arxiv 2023```): **VioLA: Unified Codec Language Models for Speech Recognition, Synthesis, and Translation** + + + + +## Update + +- May, 2023: VioLA [**Arxiv**](https://arxiv.org/abs/2305.16107). +- May, 2023: [**VATLM**](https://arxiv.org/abs/2211.11275) was accepted by IEEE Transactions on Multimedia. +- March, 2023: VALL-E X [**Arxiv**](https://arxiv.org/abs/2303.03926) and [**Demo**](https://aka.ms/vallex). +- February, 2023: [**Speech2S**](https://arxiv.org/abs/2210.17027) and [**Prosody-SpeechT5**](https://arxiv.org/abs/2211.11275) were accepted by ICASSP 2023. +- [HuggingFace Integration] February, 2023: [**SpeechT5**](https://aclanthology.org/2022.acl-long.393/) models are on [**HuggingFace**](https://huggingface.co/blog/speecht5). +- [Model Release] November, 2022: [**VATLM**](https://github.com/microsoft/SpeechT5/tree/main/VATLM) models are released. +- November, 2022: VATLM [**Arxiv**](https://arxiv.org/abs/2211.11275). +- November, 2022: Speech2S [**Arxiv**](https://arxiv.org/abs/2210.17027). +- [Model Release] October, 2022: [**SpeechUT**](https://github.com/microsoft/SpeechT5/tree/main/SpeechUT) models are released. +- October, 2022: [**SpeechUT**](https://arxiv.org/abs/2210.03730) was accepted by EMNLP 2022. +- [Model Release] October, 2022: [**SpeechLM**](https://github.com/microsoft/SpeechT5/tree/main/SpeechLM) models are released. +- September, 2022: SpeechLM [**Arxiv**](https://arxiv.org/abs/2209.15329). +- [Evaluation] June, 2022: The end-to-end ST system [**YiTrans**](https://arxiv.org/abs/2206.05777) achieved top results on [**IWSLT 2022**](https://iwslt.org/2022/offline) shared tasks. +- June, 2022: [**Speech2C**](https://www.isca-speech.org/archive/interspeech_2022/ao22_interspeech.html) was accepted by InterSpeech 2022. +- [Model Release] May, 2022: [**Speech2C**](https://github.com/microsoft/SpeechT5/tree/main/Speech2C) models are released. +- [Model Release] April, 2022: [**SpeechT5**](https://github.com/microsoft/SpeechT5/tree/main/SpeechT5) models are released. +- March, 2022: Speech2C [**Arxiv**](https://arxiv.org/abs/2203.17113). +- February, 2022: [**SpeechT5**](https://aclanthology.org/2022.acl-long.393/) was accepted by ACL 2022. +- October, 2021: SpeechT5 [**Arxiv**](https://arxiv.org/abs/2110.07205). + + +## Pre-Trained Models + + +| Model | Pre-training Dataset | Fine-tuning Dataset | Model | +| :------: | :----------------------------------------------: | :-----------------: | :-----: | +| SpeechT5 Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [LibriSpeech LM Dataset](https://www.openslr.org/11/) | - | [HuggingFace](https://huggingface.co/ajyy/SpeechT5/resolve/main/speecht5_base.pt)
[Google Drive](https://drive.google.com/file/d/1Sq00uZ1pw6Z4OUaqhOWzQEJxIVWgAO5U/view?usp=sharing) | +| SpeechT5 Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [LibriSpeech LM Dataset](https://www.openslr.org/11/) | [100 hrs LibriSpeech](http://www.openslr.org/12) | [HuggingFace](https://huggingface.co/ajyy/SpeechT5/resolve/main/speecht5_base_asr.pt)
[Google Drive](https://drive.google.com/file/d/1qLKJ81JPWOGf1MHfjSmgtZyqqTqgI6kT/view?usp=sharing) | +| SpeechT5 Large | [60k hrs Libri-Light](https://github.com/facebookresearch/libri-light) + [LibriSpeech LM Dataset](https://www.openslr.org/11/) | - | [Google Drive](https://drive.google.com/file/d/1M79b1jetSPOVxWVMIX-y0URvDjNskZKp/view?usp=sharing) | +| Speech2C | [960 hrs LibriSpeech](http://www.openslr.org/12) | - | [Google Drive](https://drive.google.com/file/d/1nGZ0LWEwlLq2pz7o805YALsMr9irV0Za/view?usp=sharing) | +| Speech2C | [960 hrs LibriSpeech](http://www.openslr.org/12) | [10 hrs LibriSpeech](http://www.openslr.org/12) | [Google Drive](https://drive.google.com/file/d/1nWSAc-33LmcDQHzH8IjXVJsuk0JZTWgN/view?usp=sharing) | +| Speech2C | [960 hrs LibriSpeech](http://www.openslr.org/12) | [100 hrs LibriSpeech](http://www.openslr.org/12) | [Google Drive](https://drive.google.com/file/d/1LwbQ5Y3tKZoK3s1ayLQgsfLTFnmkKNZs/view?usp=sharing) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | - | [Google drive](https://drive.google.com/file/d/1iJvhSGghNrMT-wAY1nwVu2YaYuTy1pxx/view?usp=sharing) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [100 hrs LibriSpeech](http://www.openslr.org/12) | [Google drive](https://drive.google.com/file/d/1mH3N7iKMWYk3rSBJErQPYf3x5ugqDq5x/view?usp=sharing) | +| SpeechLM-H Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | - | [Google drive](https://drive.google.com/file/d/1eblW8U8f9t-NTuCNRrNHwr-8BeLAUAmQ/view?usp=sharing) | +| SpeechLM-H Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [100 hrs LibriSpeech](http://www.openslr.org/12) | [Google drive](https://drive.google.com/file/d/1vXyO5DolbiWiTYZ6pkkKQsu2wJetaPlv/view?usp=sharing) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [En-De CoVoST-2](https://github.com/facebookresearch/covost) | [Azure Storage](https://valle.blob.core.windows.net/share/speechlm/finetune_covost/checkpoint_ende.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [En-Ca CoVoST-2](https://github.com/facebookresearch/covost) | [Azure Storage](https://valle.blob.core.windows.net/share/speechlm/finetune_covost/checkpoint_enca.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [En-Ar CoVoST-2](https://github.com/facebookresearch/covost) | [Azure Storage](https://valle.blob.core.windows.net/share/speechlm/finetune_covost/checkpoint_enar.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [En-Tr CoVoST-2](https://github.com/facebookresearch/covost) | [Azure Storage](https://valle.blob.core.windows.net/share/speechlm/finetune_covost/checkpoint_entr.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | - | [Google drive](https://drive.google.com/file/d/1QjLIgTJKIylVIp5hUkfSjGPtz8Xo7Lky/view?usp=sharing) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | [960 hrs LibriSpeech](http://www.openslr.org/12) | [Google drive](https://drive.google.com/file/d/1YZQDVv096o8Opt0RBnkRiZXYPRDqKZnP/view?usp=sharing) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | [En-De CoVoST-2](https://github.com/facebookresearch/covost) | [Google drive](https://drive.google.com/file/d/1qYygNWSc11TQbBI1OzC4ChlR-dNh8t9S/view?usp=sharing) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | [En-Ca CoVoST-2](https://github.com/facebookresearch/covost) | [Google drive](https://drive.google.com/file/d/162U88mwso2aVfzzPkEM2nP_vwTpcb57T/view?usp=sharing) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | [En-Ar CoVoST-2](https://github.com/facebookresearch/covost) | [Google drive](https://drive.google.com/file/d/1lbTSRXewEeb2t45URunD6EiJcbniyjWW/view?usp=sharing) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | [En-Tr CoVoST-2](https://github.com/facebookresearch/covost) | [Google drive](https://drive.google.com/file/d/1Er4I_jHS175pQQph223yKtiiLQ378VvH/view?usp=sharing) | +| SpeechUT Base (ASR) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4asr_32gpu_1accum/checkpoint_298_400000.pt?sv=2020-04-08&st=2023-03-08T01%3A39%3A48Z&se=2024-03-09T01%3A39%3A00Z&sr=b&sp=r&sig=l3gJS1D%2BJfLfNfS3z33WjmSMGrOBJ63CvqGGewC6WeU%3D)| +| SpeechUT Base (ASR) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [100 hrs LibriSpeech](http://www.openslr.org/12) | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/speechut_base_asr100h_checkpoint_best.pt?sv=2020-04-08&st=2023-03-08T01%3A41%3A22Z&se=2024-03-09T01%3A41%3A00Z&sr=b&sp=r&sig=%2B9lpGrqtZXa%2F6n1uZT%2Biey54ky31bYKSJytgfnBbbN4%3D)| +| SpeechUT Large (ASR) | [60k hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/large_speechut4asr_32gpu_4accum/checkpoint_22_400k.pt?sv=2020-04-08&st=2023-03-08T01%3A42%3A10Z&se=2024-03-09T01%3A42%3A00Z&sr=b&sp=r&sig=TZNcsHQAqapyj%2BAvpHtl749kZy9flTkWm8P5L4W26qs%3D)| +| SpeechUT Large (ASR) | [60k hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [960 hrs LibriSpeech](http://www.openslr.org/12) | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/speechut_large_asr960h_checkpoint_best.pt?sv=2020-04-08&st=2023-03-08T01%3A43%3A02Z&se=2024-03-09T01%3A43%3A00Z&sr=b&sp=r&sig=PmO%2BgSAMXRgMC7GfpS4c%2BrDPsfJGekqUzD5AJm7RrYU%3D)| +| SpeechUT Base (En-De) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [408 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [4.6M Text](https://www.statmt.org/wmt16/) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4ende_32gpu_1accum/checkpoint_217_400000.pt?sv=2020-04-08&st=2023-03-08T01%3A43%3A47Z&se=2024-03-09T01%3A43%3A00Z&sr=b&sp=r&sig=XDEesMdGQ027j7YtpSql1kZtwgfv39gruOuWYlKlJ7w%3D)| +| SpeechUT Base (En-De) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [408 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [4.6M Text](https://www.statmt.org/wmt16/) | [En-De MuST-C v1](https://ict.fbk.eu/must-c/) | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4ende_32gpu_1accum/fineutne_ende_checkpoint_avg.pt?sv=2020-04-08&st=2023-03-08T01%3A44%3A15Z&se=2024-03-09T01%3A44%3A00Z&sr=b&sp=r&sig=8dcenahRg46EJdwiHUalVBJgKra6JoSN7tUxdLAwzOM%3D)| +| SpeechUT Base (En-Es) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [504 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [15M Text](https://www.statmt.org/wmt13/) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4enes_32gpu_1accum/checkpoint_204_400000.pt?sv=2020-04-08&st=2023-03-08T01%3A48%3A16Z&se=2024-03-09T01%3A48%3A00Z&sr=b&sp=r&sig=hWoCM0y0SGZTD4CznC%2F5CejFczkqDYTOaISmlhCAYAU%3D)| +| SpeechUT Base (En-Es) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [504 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [15M Text](https://www.statmt.org/wmt13/) | [En-Es MuST-C v1](https://ict.fbk.eu/must-c/) | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4enes_32gpu_1accum/fineutne_enes_checkpoint_avg.pt?sv=2020-04-08&st=2023-03-08T01%3A48%3A41Z&se=2024-03-09T01%3A48%3A00Z&sr=b&sp=r&sig=KGfzgKfKkDVQI0JxxnS%2BsYdBQzhUqFLQAVYG0OSGBtk%3D)| +| SpeechUT Base (En-Fr) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [492 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [40M Text](https://www.statmt.org/wmt14/) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4enfr_32gpu_1accum/checkpoint_297_600000.pt?sv=2020-04-08&st=2023-03-08T01%3A49%3A09Z&se=2024-03-09T01%3A49%3A00Z&sr=b&sp=r&sig=1eqpXMLCjWpfyd7AiOHGzfk%2B8ZYqWwVWdHk1GqXgoeg%3D)| +| SpeechUT Base (En-Fr) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [492 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [40M Text](https://www.statmt.org/wmt14/) | [En-Fr MuST-C v1](https://ict.fbk.eu/must-c/) | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4enfr_32gpu_1accum/fineutne_enfr_checkpoint.pt?sv=2020-04-08&st=2023-03-08T01%3A49%3A34Z&se=2024-03-09T01%3A49%3A00Z&sr=b&sp=r&sig=i3jMAqvL1Vp7DRjACAbrdoQKhlv2Cmi40%2F14SJ6%2BoiU%3D)| + + + +## SpeechT5 Introduction + +Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text representation learning. +The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific (speech/text) pre/post-nets. +After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network models the sequence-to-sequence transformation, and then the post-nets generate the output in the speech/text modality based on the output of the decoder. + +se + +Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, hoping to improve the modeling capability for both speech and text. +To align the textual and speech information into this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text states with latent units as the interface between encoder and decoder. +Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, and speaker identification. + + + +## SpeechT5 Downstream Task Performance + +We evaluate our models on typical spoken language processing tasks, including automatic speech recognition, text to speech, speech to text translation, voice conversion, speech enhancement, and speaker identification. + +### Automatic Speech Recognition + +Evaluation on the [LibriSpeech](http://www.openslr.org/12) + +| Model |LM | dev-clean | dev-other | test-clean | test-other | +| ------------- |------------- | ------| ----- | ----| ----| +| wav2vec2.0 Base | - | 6.1 | 13.5 | 6.1 | 13.3 | +| HuBERT Base | - | 5.5 | 13.1 | 5.8 | 13.3 | +| Baseline (w/o CTC) | - | 5.8 | 12.3 | 6.2 | 12.3 | +| Baseline | - | 4.9 | 11.7 | 5.0 | 11.9 | +| SpeechT5 (w/o CTC) | - | 5.4 | 10.7 | 5.8 | 10.7 | +| **SpeechT5** | - | **4.3** | **10.3** | **4.4** | **10.4** | +| DiscreteBERT | 4-gram | 4.0 |10.9 |4.5 |12.1 | +| wav2vec 2.0 Base | 4-gram | 2.7 |7.9 |3.4 |8.0 | +| HuBERT Base | 4-gram | 2.7 |7.8 |3.4 |8.1 | +| wav2vec 2.0 Base | Transf. | 2.2 |6.3 |2.6 |6.3 | +| Baseline | Transf. | 2.3 |6.3 |2.5 |6.3 | +| **SpeechT5** | Transf. | **2.1** |**5.5** |**2.4** |**5.8** | + +### Text-to-Speech + +Evaluation on the [LibriTTS](http://www.openslr.org/60/) + + +| Model | Naturalness | MOS | CMOS | +| ------------- |------------ | ------ | ----- | +| Ground Truth | - | 3.87 | - | +| Baseline | 2.76 | 3.56 | 0 | +| **SpeechT5** | 2.91 | **3.65** | **+0.290** | + +### Speech Translation + +Evaluation on the [MUST-C v1](https://ict.fbk.eu/must-c/) + +| Model | EN-DE | EN-FR | +| ------------- |------------ | ------ | +| Fairseq ST | 22.70 | 32.90 | +| ESPnet ST | 22.91 | 32.69 | +| Adapter Tuning| 24.63 | 34.98 | +| Baseline | 23.43 | 33.76 | +| SpeechT5 (w/o initializing decoder) | 24.44 | 34.5 | +| **SpeechT5** | **25.18** | **35.30** | + + +### Voice Conversion + +Evaluation on the [CMU Arctic](http://www.festvox.org/cmu_arctic/) + + +| Model | WER | WER | MCD | MCD | +| ------------- | ------ | ----- | ---- | ----| +| | bdl to slt | clb to slt | bdl to slt | clb to slt | +| VTN w/ ASR | 11.1 | 10.9 | 6.5 | 6.11 | +| VTN w/ TTS | 7.6 | 9.1 | 6.33 | 13.3 | +| Many-to-many VTN | - | - | 6.13 | 5.97 | +| Baseline | 21.5 | 10.8 | 6.26 | 6.16 | +| **SpeechT5** | **7.8** | **6.4** | **5.93**| **5.87** | + + + +### Speech Enhancement + +Evaluation on the [WSJ0 Hipster AmbientMixtures (WHAM!)](http://wham.whisper.ai/) + + +| Model | WER | +| ------------- |------------ | +| Ground Truth Speech | 3.2 | +| Noisy Speech | 76.1 | +| Baseline | 10.9 | +| **SpeechT5** | **8.9** | + + +### Speaker Identification + +Evaluation on the [VoxCeleb1](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1.html) + +| Model | Acc | +| ------------- |------------ | +| SUPERB, wav2vec 2.0 Base | 75.18% | +| SUPERB, HuBERT Base | 81.42% | +| SUPERB, HuBERT Large | 90.33% | +| SpeechNet, single task | 86.00% | +| SpeechNet, multi-task with TTS | 87.90% | +| Thin ResNet-34 | 89.00% | +| Baseline | 91.92% | +| **SpeechT5** | **96.49%** | + +## License + +This project is licensed under the license found in the LICENSE file in the root directory of this source tree. +Portions of the source code are based on the [FAIRSEQ](https://github.com/pytorch/fairseq) and [ESPnet](https://github.com/espnet/espnet) projects. + +[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct) + +### Reference + +If you find our work is useful in your research, please cite the following paper: + +```bibtex +@article{Ao2021SpeechT5, + title = {SpeechT5: Unified-Modal Encoder-Decoder Pre-training for Spoken Language Processing}, + author = {Junyi Ao and Rui Wang and Long Zhou and Chengyi Wang and Shuo Ren and Yu Wu and Shujie Liu and Tom Ko and Qing Li and Yu Zhang and Zhihua Wei and Yao Qian and Jinyu Li and Furu Wei}, + eprint={2110.07205}, + archivePrefix={arXiv}, + primaryClass={eess.AS}, + year={2021} +} +``` + +```bibtex +@article{Ao2022Speech2C, + title = {Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data}, + author = {Junyi Ao and Ziqiang Zhang and Long Zhou and Shujie Liu and Haizhou Li and Tom Ko and Lirong Dai and Jinyu Li and Yao Qian and Furu Wei}, + eprint={2203.17113}, + archivePrefix={arXiv}, + primaryClass={cs.SD}, + year={2022} +} +``` + +```bibtex +@article{Zhang2022Yitrans, + title = {The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task}, + author = {Zhang, Ziqiang and Ao, Junyi and Zhou, Long and Liu, Shujie and Wei, Furu and Li, Jinyu}, + eprint={2206.05777}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + year={2022} +} +``` + +```bibtex +@article{zhang2022speechut, + title = {SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training}, + author = {Zhang, Ziqiang and Zhou, Long and Ao, Junyi and Liu, Shujie and Dai, Lirong and Li, Jinyu and Wei, Furu}, + eprint={2210.03730}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + year={2022} +} +``` + +```bibtex +@article{zhang2022speechlm, + title = {SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data}, + author = {Zhang, Ziqiang and Chen, Sanyuan and Zhou, Long and Wu, Yu and Ren, Shuo and Liu, Shujie and Yao, Zhuoyuan and Gong, Xun and Dai, Lirong and Li, Jinyu and Wei, Furu}, + eprint={2209.15329}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + year={2022} +} +``` + +### Contact Information + +For help or issues using SpeechT5 models, please submit a GitHub issue. + +For other communications related to SpeechT5, please contact Long Zhou (`lozhou@microsoft.com`). diff --git a/SpeechT5/SECURITY.md b/SpeechT5/SECURITY.md new file mode 100644 index 0000000000000000000000000000000000000000..869fdfe2b246991a053fab9cfec1bed3ab532ab1 --- /dev/null +++ b/SpeechT5/SECURITY.md @@ -0,0 +1,41 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). + + diff --git a/SpeechT5/Speech2C/README.md b/SpeechT5/Speech2C/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9e568918c7ee624ba9bfe8c39f810a72af69f3f2 --- /dev/null +++ b/SpeechT5/Speech2C/README.md @@ -0,0 +1,145 @@ +# Speech2C + +> [**Speech2C**](https://arxiv.org/abs/2203.17113) (```INTERSPEECH 2022```): **Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data** + +## Pre-Trained and Fine-tuned Models + +| Model | Pre-training Dataset | Fine-tuning Dataset | Model | +| :------: | :----------------------------------------------: | :-----------------: | :-----: | +| Speech2C | [960 hrs LibriSpeech](http://www.openslr.org/12) | - | [Google Drive](https://drive.google.com/file/d/1nGZ0LWEwlLq2pz7o805YALsMr9irV0Za/view?usp=sharing) | +| Speech2C | [960 hrs LibriSpeech](http://www.openslr.org/12) | [10 hrs LibriSpeech](http://www.openslr.org/12) | [Google Drive](https://drive.google.com/file/d/1nWSAc-33LmcDQHzH8IjXVJsuk0JZTWgN/view?usp=sharing) | +| Speech2C | [960 hrs LibriSpeech](http://www.openslr.org/12) | [100 hrs LibriSpeech](http://www.openslr.org/12) | [Google Drive](https://drive.google.com/file/d/1LwbQ5Y3tKZoK3s1ayLQgsfLTFnmkKNZs/view?usp=sharing) | + + +## Language Model and Vocabulary +| Model | Dataset | Model | Vocabulary | +| :------: | :------: | :---: | :--------: | +| LM | [LibriSpeech LM Dataset](https://www.openslr.org/11/) | [Model](https://drive.google.com/file/d/1UDCcNJT1DlquSRw0iRAXH6GHlf6zK6-8/view?usp=sharing) | [Vocabulary](https://dl.fbaipublicfiles.com/fairseq/wav2vec/dict.ltr.txt) | + +## Setup +``` +git submodule update --init Speech2C/fairseq +cd Speech2C/ +pip install --editable fairseq/ +``` + +## Data Preparation +Please follow the steps of data preparation for HuBERT in [here](https://github.com/facebookresearch/fairseq/tree/main/examples/hubert#data-preparation). + +## Pre-Training +``` +DATA_DIR= +LABEL_DIR= +FAIRSEQ_PATH= + +python ${FAIRSEQ_PATH}/fairseq_cli/hydra_train.py \ + --config-dir speech2c/config \ + --config-name speech2c_base_librispeech \ + task.data=${DATA_DIR} task.label_dir=${LABEL_DIR} task.labels='["km"]' \ + model.label_rate=50 common.user_dir=SpeechT5/Speech2C/speech2c \ +``` + +## Finetune + +``` +DATA_DIR= +LABEL_DIR= +FAIRSEQ_PATH= +W2V_PATH= +CONFIG_NAME= + +python ${FAIRSEQ_PATH}/fairseq_cli/hydra_train.py \ + --config-dir speech2c/config \ + --config-name ${CONFIG_NAME} \ + task.data=${DATA_DIR} task.label_dir=${LABEL_DIR} \ + model.w2v_path=${W2V_PATH} common.user_dir=SpeechT5/Speech2C/speech2c \ +``` + +## Inference +Note that joint CTC and decoder inference is only supported when the batch size is 1. + +``` +FAIRSEQ_PATH= +DATA_DIR= +LABEL_DIR= +BEAM_SIZE= +CTC_WEIGHT= +TEST_SET= +CHECKPOINT_PATH= +W2V_PATH= + + +python ${FAIRSEQ_PATH}/fairseq_cli/generate.py ${DATA_DIR} \ + --label-dir ${LABEL_DIR} \ + --path ${CHECKPOINT_PATH} \ + --user-dir SpeechT5/Speech2C/speech2c \ + --model-overrides "{'w2v_path': '${W2V_PATH}'}" \ + --gen-subset ${TEST_SET} \ + --task speech2c_pretraining \ + --post-process letter \ + --add-decoder \ + --labels '["ltr"]' \ + --fine-tuning \ + --scoring wer \ + --max-len-a 0 \ + --max-len-b 620 \ + --pad-audio \ + --random-crop \ + --ctc-weight ${CTC_WEIGHT} \ + --max-tokens 8000000 \ + --beam ${BEAM_SIZE} \ + --single-target \ +``` + +## Results on Librispeech + +### Evaluation on the [LibriSpeech](http://www.openslr.org/12) 10hr subset + +| Model |LM | test-clean | test-other | +| ------------- |------------- | ----| ----| +| wav2vec2.0 Base | - | 11.1 | 17.6 | +| HuBERT Base | - | 10.1 | 16.8 | +| **Speech2C** | - | **7.8** | **13.1** | +| wav2vec 2.0 Base | 4-gram | 4.3 |9.5 | +| wav2vec 2.0 Base | Transf. |3.2 |7.8 | +| HuBERT Base | 4-gram |4.3 |9.4 | +| **Speech2C** | **Transf.** | **3.1** | **7.0** | + +### Evaluation on the [LibriSpeech](http://www.openslr.org/12) 100hr subset + +| Model |LM | test-clean | test-other | +| ------------- |------------- | ----| ----| +| wav2vec2.0 Base | - | 6.1 | 13.3 | +| wav2vec2.0 Large | - | 4.7 | 9.0 | +| HuBERT Base | - | 6.3 | 13.2 | +| SpeechT5 | - | 4.4 | 10.4 | +| Baseline | - | 5.0 | 11.9 | +| **Speech2C** | - | **4.3** |**9.0** | +| wav2vec 2.0 Base | 4-gram | 3.4 |8.0 | +| wav2vec 2.0 Base | Transf. | 2.6 | 6.3 | +| HuBERT Base | 4-gram | 3.4 |8.1 | +| SpeechT5 | Transf. | 2.4 |5.8 | +| Baseline | Transf. | 2.5 |6.3 | +| **Speech2C** | **Transf.** | **2.4** |**5.2** | + +## License + +This project is licensed under the license found in the LICENSE file in the root directory of this source tree. +Portions of the source code are based on the [FAIRSEQ](https://github.com/pytorch/fairseq). + +[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct) + +## Reference + +If you find our work is useful in your research, please cite the following paper: + +```bibtex +@article{Ao2022Speech2C, + title = {Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data}, + author = {Junyi Ao and Ziqiang Zhang and Long Zhou and Shujie Liu and Haizhou Li and Tom Ko and Lirong Dai and Jinyu Li and Yao Qian and Furu Wei}, + eprint={2203.17113}, + archivePrefix={arXiv}, + primaryClass={cs.SD}, + year={2022} +} +``` diff --git a/SpeechT5/Speech2C/speech2c/__init__.py b/SpeechT5/Speech2C/speech2c/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8994f9a368ae4b2eff720fffb134e2a5b813ee1c --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/__init__.py @@ -0,0 +1 @@ +from . import data, tasks, criterions, models # noqa \ No newline at end of file diff --git a/SpeechT5/Speech2C/speech2c/config/base_100h.yaml b/SpeechT5/Speech2C/speech2c/config/base_100h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2af86af96e3719a1419a4dd49af156d4c61e9c49 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/config/base_100h.yaml @@ -0,0 +1,93 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + no_epoch_checkpoints: true + best_checkpoint_metric: dec_accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 1 + distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: speech2c_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: false # must be consistent with pre-training + labels: ["ltr"] + single_target: true + add_decoder: true + pad_audio: true + random_crop: false + +dataset: + num_workers: 6 + max_tokens: 3200000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_100h + valid_subset: dev_other + +criterion: + _name: ctc_ce + zero_infinity: true + +optimization: + max_update: 80000 + lr: [0.00004] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: speech2c_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.1 + decoder_layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 25000 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2C/speech2c/config/base_10h.yaml b/SpeechT5/Speech2C/speech2c/config/base_10h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aaa4ed7a79998fc1a09480f2917e2557e8aba457 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/config/base_10h.yaml @@ -0,0 +1,104 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 5 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: dec_accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 1 + distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: speech2c_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: false # must be consistent with pre-training + labels: ["ltr"] + single_target: true + add_decoder: true + pad_audio: true + random_crop: false + +dataset: + num_workers: 6 + max_tokens: 3200000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: ${model.freeze_finetune_updates} + validate_interval: 5 + train_subset: train_10h + valid_subset: dev_other + +criterion: + _name: ctc_ce + zero_infinity: true + +optimization: + max_update: 25000 + lr: [2e-5] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: speech2c_ctc + w2v_path: ??? + apply_mask: true + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + decoder_layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 0.0 + freeze_finetune_updates: 10000 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2C/speech2c/config/speech2c_base_librispeech.yaml b/SpeechT5/Speech2C/speech2c/config/speech2c_base_librispeech.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f361375d8d11d6d3f7dc5573bbfc1e779930d52 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/config/speech2c_base_librispeech.yaml @@ -0,0 +1,100 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: true + + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 32 + distributed_port: 29671 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: speech2c_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: false # must be consistent with extractor + add_decoder: true + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + validate_interval_updates: 10000 + +criterion: + _name: speech2c + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: speech2c + label_rate: ??? + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: default + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + encoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + decoder_dict_size: -1 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2C/speech2c/criterions/__init__.py b/SpeechT5/Speech2C/speech2c/criterions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..69fc7d7c6fa06ee16e28752119410410bf3e212f --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/criterions/__init__.py @@ -0,0 +1,10 @@ +import importlib +import os + + +for file in os.listdir(os.path.dirname(__file__)): + if file.endswith(".py") and not file.startswith("_"): + criterion_name = file[: file.find(".py")] + importlib.import_module( + "speech2c.criterions." + criterion_name + ) diff --git a/SpeechT5/Speech2C/speech2c/criterions/ctc_ce.py b/SpeechT5/Speech2C/speech2c/criterions/ctc_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..39922924a1f22f6405f743cf262ca3609de59268 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/criterions/ctc_ce.py @@ -0,0 +1,404 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import math +from argparse import Namespace +from dataclasses import dataclass, field +from omegaconf import II +from typing import Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.dataclass import FairseqDataclass +from fairseq.data.data_utils import post_process +from fairseq.tasks import FairseqTask +from fairseq.logging.meters import safe_round + + +@dataclass +class CtcCeCriterionConfig(FairseqDataclass): + zero_infinity: bool = field( + default=False, + metadata={"help": "zero inf loss when source length <= target length"}, + ) + sentence_avg: bool = II("optimization.sentence_avg") + post_process: str = field( + default="letter", + metadata={ + "help": "how to post process predictions into words. can be letter, " + "wordpiece, BPE symbols, etc. " + "See fairseq.data.data_utils.post_process() for full list of options" + }, + ) + wer_kenlm_model: Optional[str] = field( + default=None, + metadata={ + "help": "if this is provided, use kenlm to compute wer (along with other wer_* args)" + }, + ) + wer_lexicon: Optional[str] = field( + default=None, + metadata={"help": "lexicon to use with wer_kenlm_model"}, + ) + wer_lm_weight: float = field( + default=2.0, + metadata={"help": "lm weight to use with wer_kenlm_model"}, + ) + wer_word_score: float = field( + default=-1.0, + metadata={"help": "lm word score to use with wer_kenlm_model"}, + ) + + wer_args: Optional[str] = field( + default=None, + metadata={ + "help": "DEPRECATED: tuple of (wer_kenlm_model, wer_lexicon, wer_lm_weight, wer_word_score)" + }, + ) + + dec_weight: float = field( + default=0.5, + metadata={"help": "weights for decoder CE Loss, loss will be ((1 - dec_weight) * hubert_loss + dec_weight * CE_Loss)"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + label_smoothing: float = field( + default=0.1, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + + +@register_criterion("ctc_ce", dataclass=CtcCeCriterionConfig) +class CtcCeCriterion(FairseqCriterion): + def __init__(self, cfg: CtcCeCriterionConfig, task: FairseqTask): + super().__init__(task) + self.blank_idx = ( + task.target_dictionary.index(task.blank_symbol) + if hasattr(task, "blank_symbol") + else 0 + ) + self.pad_idx = task.target_dictionary.pad() + self.eos_idx = task.target_dictionary.eos() + self.post_process = cfg.post_process + + if cfg.wer_args is not None: + ( + cfg.wer_kenlm_model, + cfg.wer_lexicon, + cfg.wer_lm_weight, + cfg.wer_word_score, + ) = eval(cfg.wer_args) + + if cfg.wer_kenlm_model is not None: + from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder + + dec_args = Namespace() + dec_args.nbest = 1 + dec_args.criterion = "ctc" + dec_args.kenlm_model = cfg.wer_kenlm_model + dec_args.lexicon = cfg.wer_lexicon + dec_args.beam = 50 + dec_args.beam_size_token = min(50, len(task.target_dictionary)) + dec_args.beam_threshold = min(50, len(task.target_dictionary)) + dec_args.lm_weight = cfg.wer_lm_weight + dec_args.word_score = cfg.wer_word_score + dec_args.unk_weight = -math.inf + dec_args.sil_weight = 0 + + self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary) + else: + self.w2l_decoder = None + + self.zero_infinity = cfg.zero_infinity + self.sentence_avg = cfg.sentence_avg + + self.dec_weight = cfg.dec_weight + self.report_accuracy = cfg.report_accuracy + self.ignore_prefix_size = cfg.ignore_prefix_size + self.eps = cfg.label_smoothing + + def forward(self, model, sample, reduce=True): + net_output = model(**sample["net_input"]) + lprobs = model.get_normalized_probs( + net_output, log_probs=True + ).contiguous() # (T, B, C) from the encoder + + if "src_lengths" in sample["net_input"]: + input_lengths = sample["net_input"]["src_lengths"] + else: + if net_output["padding_mask"] is not None: + non_padding_mask = ~net_output["padding_mask"] + input_lengths = non_padding_mask.long().sum(-1) + else: + input_lengths = lprobs.new_full( + (lprobs.size(1),), lprobs.size(0), dtype=torch.long + ) + + pad_mask = (sample["target"] != self.pad_idx) & ( + sample["target"] != self.eos_idx + ) + targets_flat = sample["target"].masked_select(pad_mask) + if "target_lengths" in sample: + target_lengths = sample["target_lengths"] + else: + target_lengths = pad_mask.sum(-1) + + with torch.backends.cudnn.flags(enabled=False): + loss = F.ctc_loss( + lprobs, + targets_flat, + input_lengths, + target_lengths, + blank=self.blank_idx, + reduction="sum", + zero_infinity=self.zero_infinity, + ) + + ntokens = ( + sample["ntokens"] if "ntokens" in sample else target_lengths.sum().item() + ) + + sample_size = sample["target"].size(0) if self.sentence_avg else ntokens + + logging_output = {} + if "decoder_target" in sample: + dec_sample_size = sample["target"].size(0) if self.sentence_avg else sample["dec_ntokens"] + dec_loss, dec_nll_loss = self.compute_ce_loss(model, net_output["decoder_out"], sample, reduce=reduce) + logging_output["ctc_loss"] = loss.item() + loss = (1 - self.dec_weight) * loss + (self.dec_weight * dec_loss * sample_size / dec_sample_size) + logging_output["dec_loss"] = dec_loss.item() + logging_output["dec_nll_loss"] = dec_nll_loss.item() + logging_output["dec_sample_size"] = dec_sample_size + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output["decoder_out"], sample) + logging_output["dec_n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + + logging_output = { + "loss": utils.item(loss.data), # * sample['ntokens'], + "ntokens": ntokens, + "nsentences": sample["id"].numel(), + "sample_size": sample_size, + **logging_output, + } + + if not model.training: + import editdistance + + with torch.no_grad(): + lprobs_t = lprobs.transpose(0, 1).float().contiguous().cpu() + + c_err = 0 + c_len = 0 + w_errs = 0 + w_len = 0 + wv_errs = 0 + for lp, t, inp_l in zip( + lprobs_t, + sample["target_label"] + if "target_label" in sample + else sample["target"], + input_lengths, + ): + lp = lp[:inp_l].unsqueeze(0) + + decoded = None + if self.w2l_decoder is not None: + decoded = self.w2l_decoder.decode(lp) + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + + p = (t != self.task.target_dictionary.pad()) & ( + t != self.task.target_dictionary.eos() + ) + targ = t[p] + targ_units = self.task.target_dictionary.string(targ) + targ_units_arr = targ.tolist() + + toks = lp.argmax(dim=-1).unique_consecutive() + pred_units_arr = toks[toks != self.blank_idx].tolist() + + c_err += editdistance.eval(pred_units_arr, targ_units_arr) + c_len += len(targ_units_arr) + + targ_words = post_process(targ_units, self.post_process).split() + + pred_units = self.task.target_dictionary.string(pred_units_arr) + pred_words_raw = post_process(pred_units, self.post_process).split() + + if decoded is not None and "words" in decoded: + pred_words = decoded["words"] + w_errs += editdistance.eval(pred_words, targ_words) + wv_errs += editdistance.eval(pred_words_raw, targ_words) + else: + dist = editdistance.eval(pred_words_raw, targ_words) + w_errs += dist + wv_errs += dist + + w_len += len(targ_words) + + logging_output["wv_errors"] = wv_errs + logging_output["w_errors"] = w_errs + logging_output["w_total"] = w_len + logging_output["c_errors"] = c_err + logging_output["c_total"] = c_len + + return loss, sample_size, logging_output + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.pad_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.pad_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = sample["decoder_target"] + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + nsentences = utils.item( + sum(log.get("nsentences", 0) for log in logging_outputs) + ) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar("ntokens", ntokens) + metrics.log_scalar("nsentences", nsentences) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + + c_errors = sum(log.get("c_errors", 0) for log in logging_outputs) + metrics.log_scalar("_c_errors", c_errors) + c_total = sum(log.get("c_total", 0) for log in logging_outputs) + metrics.log_scalar("_c_total", c_total) + w_errors = sum(log.get("w_errors", 0) for log in logging_outputs) + metrics.log_scalar("_w_errors", w_errors) + wv_errors = sum(log.get("wv_errors", 0) for log in logging_outputs) + metrics.log_scalar("_wv_errors", wv_errors) + w_total = sum(log.get("w_total", 0) for log in logging_outputs) + metrics.log_scalar("_w_total", w_total) + + if c_total > 0: + metrics.log_derived( + "uer", + lambda meters: safe_round( + meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3 + ) + if meters["_c_total"].sum > 0 + else float("nan"), + ) + if w_total > 0: + metrics.log_derived( + "wer", + lambda meters: safe_round( + meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + metrics.log_derived( + "raw_wer", + lambda meters: safe_round( + meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + + if "dec_loss" in logging_outputs[0]: + ctc_loss_sum = sum(log.get("ctc_loss", 0) for log in logging_outputs) + dec_loss_sum = sum(log.get("dec_loss", 0) for log in logging_outputs) + dec_nll_loss_sum = sum(log.get("dec_nll_loss", 0) for log in logging_outputs) + dec_sample_size = sum(log.get("dec_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "dec_loss", dec_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_scalar( + "ctc_loss", ctc_loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "dec_nll_loss", dec_nll_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_derived( + "dec_ppl", lambda meters: utils.get_perplexity(meters["dec_nll_loss"].avg) + ) + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("dec_n_correct", n_correct) + metrics.log_derived( + "dec_accuracy", + lambda meters: round( + meters["dec_n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/SpeechT5/Speech2C/speech2c/criterions/speech2c_criterion.py b/SpeechT5/Speech2C/speech2c/criterions/speech2c_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..f6a695fc04024df3f2b5f8d87077484491c90d84 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/criterions/speech2c_criterion.py @@ -0,0 +1,261 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import math +import re +from dataclasses import dataclass, field + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.criterions.hubert_criterion import HubertCriterionConfig + +@dataclass +class Speech2cCriterionConfig(HubertCriterionConfig): + dec_weight: float = field( + default=1.0, + metadata={"help": "weights for decoder CE Loss, loss will be (hubert_loss + dec_weight * CE_Loss)"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + + +@register_criterion("speech2c", dataclass=Speech2cCriterionConfig) +class Speech2cCriterion(FairseqCriterion): + def __init__(self, task, pred_masked_weight, pred_nomask_weight, loss_weights=None, log_keys=None, dec_weight=1.0, report_accuracy=False, ignore_prefix_size=0, label_smoothing=0.0): + super().__init__(task) + self.pred_masked_weight = pred_masked_weight + self.pred_nomask_weight = pred_nomask_weight + self.loss_weights = loss_weights + self.log_keys = [] if log_keys is None else log_keys + self.dec_weight = dec_weight + self.report_accuracy = report_accuracy + self.ignore_prefix_size = ignore_prefix_size + self.eps = label_smoothing + self.padding_idx = task.dictionaries[0].pad() + + def forward(self, model, sample, reduce=True, log_pred=False): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(target_list=sample["target_list"], **sample["net_input"]) + loss = 0.0 + sample_size = 0 + logging_output = {} + reduction = "sum" if reduce else "none" + + loss_m_list = [] + logp_m_list = model.get_logits(net_output, True) + targ_m_list = model.get_targets(net_output, True) + assert self.pred_masked_weight == 0 or len(logp_m_list) > 0 + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"loss_m_{i}"] = loss_m.detach().item() + if self.pred_masked_weight > 0: + loss += self.pred_masked_weight * sum(loss_m_list) + sample_size += targ_m_list[0].numel() + + loss_u_list = [] + logp_u_list = model.get_logits(net_output, False) + targ_u_list = model.get_targets(net_output, False) + assert self.pred_nomask_weight == 0 or len(logp_u_list) > 0 + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"loss_u_{i}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss += self.pred_nomask_weight * sum(loss_u_list) + sample_size += targ_u_list[0].numel() + + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len( + self.loss_weights + ), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss += p + logging_output[f"loss_{n}"] = p.item() + + if "decoder_target" in sample: + dec_sample_size = sample["dec_ntokens"] + dec_loss, dec_nll_loss = self.compute_ce_loss(model, net_output["decoder_out"], sample, reduce=reduce) + loss = loss + (self.dec_weight * dec_loss * sample_size / dec_sample_size) + logging_output["dec_loss"] = dec_loss.item() + logging_output["dec_nll_loss"] = dec_nll_loss.item() + logging_output["dec_sample_size"] = dec_sample_size + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output["decoder_out"], sample) + logging_output["dec_n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + + logging_output = { + "loss": loss.item() if reduce else loss, + "ntokens": sample_size, + "nsentences": sample["id"].numel(), + "sample_size": sample_size, + **logging_output, + } + + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + def compute_correct(logits): + if logits.numel() == 0: + return 0, 0 + else: + assert logits.dim() > 1, logits.shape + max = logits.argmax(-1) == 0 + min = logits.argmin(-1) == 0 + both = max & min + corr = max.long().sum().item() - both.long().sum().item() + count = max.numel() + return corr, count + + with torch.no_grad(): + for i, logp_m in enumerate(logp_m_list): + corr_m, count_m = compute_correct(logp_m) + logging_output[f"correct_m_{i}"] = corr_m + logging_output[f"count_m_{i}"] = count_m + + for i, logp_u in enumerate(logp_u_list): + corr_u, count_u = compute_correct(logp_u) + logging_output[f"correct_u_{i}"] = corr_u + logging_output[f"count_u_{i}"] = count_u + + return loss, sample_size, logging_output + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.padding_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.padding_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = sample["decoder_target"] + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training (copied from normal cross entropy).""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar("loss", loss_sum / sample_size / math.log(2), sample_size, round=3) + if sample_size != ntokens: + metrics.log_scalar("nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3) + metrics.log_derived("ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)) + else: + metrics.log_derived("ppl", lambda meters: utils.get_perplexity(meters["loss"].avg)) + + counts = {} + for lk in logging_outputs[0].keys(): + if lk.startswith("count_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val) + counts[lk] = val + + for lk in logging_outputs[0].keys(): + if lk.startswith("loss_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / sample_size / math.log(2), round=3) + elif lk.startswith("correct_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)]) + + if "dec_loss" in logging_outputs[0]: + dec_loss_sum = sum(log.get("dec_loss", 0) for log in logging_outputs) + dec_nll_loss_sum = sum(log.get("dec_nll_loss", 0) for log in logging_outputs) + dec_sample_size = sum(log.get("dec_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "dec_loss", dec_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_scalar( + "dec_nll_loss", dec_nll_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_derived( + "dec_ppl", lambda meters: utils.get_perplexity(meters["dec_nll_loss"].avg) + ) + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("dec_n_correct", n_correct) + metrics.log_derived( + "dec_accuracy", + lambda meters: round( + meters["dec_n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + raise NotImplementedError() + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/SpeechT5/Speech2C/speech2c/data/speech2c_dataset.py b/SpeechT5/Speech2C/speech2c/data/speech2c_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..7af1303b0faa145d19e0bdf1d0a1ed9db61ad625 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/data/speech2c_dataset.py @@ -0,0 +1,145 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import logging +from typing import Any, List, Optional, Union + +import torch +from fairseq.data import data_utils, Dictionary +from fairseq.data.audio.hubert_dataset import HubertDataset +logger = logging.getLogger(__name__) + + +class Speech2cDataset(HubertDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + random_crop: bool = False, + single_target: bool = False, + tgt_dict: Optional[Dictionary] = None, + add_decoder: bool = False, + fine_tuning: bool = False, + ): + super().__init__( + manifest_path, + sample_rate, + label_paths, + label_rates, + pad_list, + eos_list, + label_processors, + max_keep_sample_size, + min_keep_sample_size, + max_sample_size, + shuffle, + pad_audio, + normalize, + store_labels, + random_crop, + single_target + ) + + self.tgt_dict = tgt_dict + self.add_decoder = add_decoder + self.fine_tuning = fine_tuning + + def collater(self, samples): + # target = max(sizes) -> random_crop not used + # target = max_sample_size -> random_crop used for long + samples = [s for s in samples if s["source"] is not None] + if len(samples) == 0: + return {} + + audios = [s["source"] for s in samples] + audio_sizes = [len(s) for s in audios] + if self.pad_audio: + audio_size = min(max(audio_sizes), self.max_sample_size) + else: + audio_size = min(min(audio_sizes), self.max_sample_size) + collated_audios, padding_mask, audio_starts = self.collater_audio( + audios, audio_size + ) + + targets_by_label = [ + [s["label_list"][i] for s in samples] for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, audio_size, audio_starts + ) + + if self.add_decoder: + if self.fine_tuning: + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]], torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + else: + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]].unique_consecutive(), torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + dec_ntokens = sum(x.size(0) for x in decoder_label) + decoder_target = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos(), + left_pad=False, + move_eos_to_beginning=False, + ) + decoder_target_lengths = torch.tensor( + [x.size(0) for x in decoder_label], dtype=torch.long + ) + prev_output_tokens = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos(), + left_pad=False, + move_eos_to_beginning=True, + ) + net_input = { + "source": collated_audios, + "padding_mask": padding_mask, + "prev_output_tokens": prev_output_tokens, + } + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + "decoder_target": decoder_target, + "decoder_target_lengths": decoder_target_lengths, + "dec_ntokens": dec_ntokens, + } + else: + net_input = {"source": collated_audios, "padding_mask": padding_mask} + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + return batch diff --git a/SpeechT5/Speech2C/speech2c/models/modules/ctc_prefix_score.py b/SpeechT5/Speech2C/speech2c/models/modules/ctc_prefix_score.py new file mode 100644 index 0000000000000000000000000000000000000000..b42cbd819abf7bdd718bef3db3f553c8360ac384 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/models/modules/ctc_prefix_score.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +# Copyright 2018 Mitsubishi Electric Research Labs (Takaaki Hori) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import numpy as np +import six + + +class CTCPrefixScore(object): + """Compute CTC label sequence scores + which is based on Algorithm 2 in WATANABE et al. + "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," + but extended to efficiently compute the probablities of multiple labels + simultaneously + """ + + def __init__(self, x, blank, eos, xp): + self.xp = xp + self.logzero = -10000000000.0 + self.blank = blank + self.eos = eos + self.input_length = len(x) + self.x = x + + def initial_state(self): + """Obtain an initial CTC state + :return: CTC state + """ + # initial CTC state is made of a frame x 2 tensor that corresponds to + # r_t^n() and r_t^b(), where 0 and 1 of axis=1 represent + # superscripts n and b (non-blank and blank), respectively. + r = self.xp.full((self.input_length, 2), self.logzero, dtype=np.float32) + r[0, 1] = self.x[0, self.blank] + for i in six.moves.range(1, self.input_length): + r[i, 1] = r[i - 1, 1] + self.x[i, self.blank] + return r + + def __call__(self, y, cs, r_prev): + """Compute CTC prefix scores for next labels + :param y : prefix label sequence + :param cs : array of next labels + :param r_prev: previous CTC state + :return ctc_scores, ctc_states + """ + # initialize CTC states + output_length = len(y) - 1 # ignore sos + # new CTC states are prepared as a frame x (n or b) x n_labels tensor + # that corresponds to r_t^n(h) and r_t^b(h). + r = self.xp.ndarray((self.input_length, 2, len(cs)), dtype=np.float32) + xs = self.x[:, cs] + if output_length == 0: + r[0, 0] = xs[0] + r[0, 1] = self.logzero + else: + r[output_length - 1] = self.logzero + + # prepare forward probabilities for the last label + r_sum = self.xp.logaddexp( + r_prev[:, 0], r_prev[:, 1] + ) # log(r_t^n(g) + r_t^b(g)) + last = y[-1] + if output_length > 0 and last in cs: + log_phi = self.xp.ndarray((self.input_length, len(cs)), dtype=np.float32) + for i in six.moves.range(len(cs)): + log_phi[:, i] = r_sum if cs[i] != last else r_prev[:, 1] + else: + log_phi = r_sum + + # compute forward probabilities log(r_t^n(h)), log(r_t^b(h)), + # and log prefix probabilities log(psi) + start = max(output_length, 1) + log_psi = r[start - 1, 0] + for t in six.moves.range(start, self.input_length): + r[t, 0] = self.xp.logaddexp(r[t - 1, 0], log_phi[t - 1]) + xs[t] + r[t, 1] = ( + self.xp.logaddexp(r[t - 1, 0], r[t - 1, 1]) + self.x[t, self.blank] + ) + log_psi = self.xp.logaddexp(log_psi, log_phi[t - 1] + xs[t]) + + # get P(...eos|X) that ends with the prefix itself + eos_pos = self.xp.where(cs == self.eos)[0] + if len(eos_pos) > 0: + log_psi[eos_pos] = r_sum[-1] # log(r_T^n(g) + r_T^b(g)) + + # exclude blank probs + blank_pos = self.xp.where(cs == self.blank)[0] + if len(blank_pos) > 0: + log_psi[blank_pos] = self.logzero + + # return the log prefix probability and CTC states, where the label axis + # of the CTC states is moved to the first axis to slice it easily + return log_psi, self.xp.rollaxis(r, 2) diff --git a/SpeechT5/Speech2C/speech2c/models/modules/multihead_attention.py b/SpeechT5/Speech2C/speech2c/models/modules/multihead_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..7b1c1445037ada5aef5b8cf9fd3b63b05d95aca1 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/models/modules/multihead_attention.py @@ -0,0 +1,341 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +from typing import Dict, Optional, Tuple + +import torch +import torch.nn.functional as F +from fairseq import utils +from torch import Tensor + +from fairseq.modules import MultiheadAttention as FairseqMultiheadAttention + + +class MultiheadAttention(FairseqMultiheadAttention): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + ): + super().__init__( + embed_dim, + num_heads, + kdim, + vdim, + dropout, + bias, + add_bias_kv, + add_zero_attn, + self_attention, + encoder_decoder_attention, + q_noise, + qn_block_size, + ) + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + position_bias: Optional[Tensor] = None, + ) -> Tuple[Tensor, Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + is_tpu = query.device.type == "xla" + + tgt_len, bsz, embed_dim = query.size() + src_len = tgt_len + assert embed_dim == self.embed_dim, f"query dim {embed_dim} != {self.embed_dim}" + assert list(query.size()) == [tgt_len, bsz, embed_dim] + if key is not None: + src_len, key_bsz, _ = key.size() + if not torch.jit.is_scripting(): + assert key_bsz == bsz + assert value is not None + assert src_len, bsz == value.shape[:2] + + if ( + not self.onnx_trace + and not is_tpu # don't use PyTorch version on TPUs + and incremental_state is None + and not static_kv + # A workaround for quantization to work. Otherwise JIT compilation + # treats bias in linear module as method. + and not torch.jit.is_scripting() + and position_bias is None + ): + assert key is not None and value is not None + return F.multi_head_attention_forward( + query, + key, + value, + self.embed_dim, + self.num_heads, + torch.empty([0]), + torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), + self.bias_k, + self.bias_v, + self.add_zero_attn, + self.dropout_module.p, + self.out_proj.weight, + self.out_proj.bias, + self.training or self.dropout_module.apply_during_inference, + key_padding_mask, + need_weights, + attn_mask, + use_separate_proj_weight=True, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + ) + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if k is not None: + k = ( + k.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + src_len = k.size(1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + assert k.size(1) == src_len + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + torch.zeros(key_padding_mask.size(0), 1).type_as( + key_padding_mask + ), + ], + dim=1, + ) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + if position_bias is not None: ## first order + ## position_bias: [241, 241, 64] + #print ("attn_weights: ", attn_weights.size()) # [492, 241, 241] + reshape_q = q.contiguous().view(bsz * self.num_heads, -1, self.head_dim).transpose(0,1) #[241, 492, 64] + #print ("reshape_q: ", reshape_q.size()) + B = torch.matmul(reshape_q, position_bias.transpose(-2, -1)) + #print ("B: ", B.size()) ## [241, 492, 241] + #B = B.transpose(0, 1).view(bsz, self.num_heads, position_bias.size(0), position_bias.size(1)) + B = B.transpose(0, 1).view(bsz*self.num_heads, position_bias.size(0), position_bias.size(1)) + #print ("B 2: ", B.size()) + attn_weights += B + + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + if not is_tpu: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if before_softmax: + return attn_weights, v + + attn_weights_float = utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace + ) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + if self.onnx_trace and attn.size(1) == 1: + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + attn_weights = attn_weights_float.view( + bsz, self.num_heads, tgt_len, src_len + ).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights diff --git a/SpeechT5/Speech2C/speech2c/models/modules/relative_pos_enc.py b/SpeechT5/Speech2C/speech2c/models/modules/relative_pos_enc.py new file mode 100644 index 0000000000000000000000000000000000000000..2a073ebf2893e9e9b092aa520bdaf927e9388c2b --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/models/modules/relative_pos_enc.py @@ -0,0 +1,35 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import torch + +class RelativePositionalEncoding(torch.nn.Module): + def __init__(self, d_model, maxlen=1000, embed_v=False): + super(RelativePositionalEncoding, self).__init__() + + self.d_model = d_model + self.maxlen = maxlen + self.pe_k = torch.nn.Embedding(2*maxlen, d_model) + if embed_v: + self.pe_v = torch.nn.Embedding(2*maxlen, d_model) + self.embed_v = embed_v + + + def forward(self, pos_seq, incremental_state=None): + pos_seq[pos_seq < -self.maxlen] = -self.maxlen + pos_seq[pos_seq >= self.maxlen] = self.maxlen - 1 + pos_seq = pos_seq + self.maxlen + + if incremental_state is not None: + pos_seq = pos_seq[-1:] + + if self.embed_v: + return self.pe_k(pos_seq), self.pe_v(pos_seq) + else: + return self.pe_k(pos_seq), None diff --git a/SpeechT5/Speech2C/speech2c/models/modules/transformer_decoder.py b/SpeechT5/Speech2C/speech2c/models/modules/transformer_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..aaf4dce4ac717453bf4c37f3f393092ea53ef062 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/models/modules/transformer_decoder.py @@ -0,0 +1,485 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import math +from typing import Any, Dict, List, Optional + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqIncrementalDecoder +from fairseq.models.transformer import TransformerConfig +from fairseq.models.transformer.transformer_decoder import module_name_fordropout, Linear +from fairseq.modules import ( + AdaptiveSoftmax, + BaseLayer, + FairseqDropout, + LayerDropModuleList, + LayerNorm, + PositionalEmbedding, + SinusoidalPositionalEmbedding, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from torch import Tensor + + +from speech2c.models.modules.transformer_decoder_layer import TransformerDecoderLayerBase +from speech2c.models.modules.relative_pos_enc import RelativePositionalEncoding + + +class TransformerDecoderBase(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *cfg.decoder.layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + cfg, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + use_rel_pos_enc=False, + ): + self.cfg = cfg + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + self._future_mask = torch.empty(0) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.decoder_layerdrop = cfg.decoder.layerdrop + self.share_input_output_embed = cfg.share_decoder_input_output_embed + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = cfg.decoder.embed_dim + self.embed_dim = embed_dim + self.output_embed_dim = cfg.decoder.output_dim + + self.padding_idx = embed_tokens.padding_idx + self.max_target_positions = cfg.max_target_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + self.embed_positions = ( + PositionalEmbedding( + self.max_target_positions, + embed_dim, + self.padding_idx, + learned=cfg.decoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + self.cross_self_attention = cfg.cross_self_attention + + self.use_rel_pos_enc = use_rel_pos_enc + if self.decoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.decoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + self.build_decoder_layer(cfg, no_encoder_attn) + for _ in range(cfg.decoder.layers) + ] + ) + self.num_layers = len(self.layers) + + if cfg.decoder.normalize_before and not cfg.no_decoder_final_norm: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + + self.project_out_dim = ( + Linear(embed_dim, self.output_embed_dim, bias=False) + if embed_dim != self.output_embed_dim and not cfg.tie_adaptive_weights + else None + ) + + self.adaptive_softmax = None + self.output_projection = output_projection + if self.output_projection is None: + self.build_output_projection(cfg, dictionary, embed_tokens) + + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(self.embed_dim // cfg.decoder.attention_heads, 24) + + def build_output_projection(self, cfg, dictionary, embed_tokens): + if cfg.adaptive_softmax_cutoff is not None: + self.adaptive_softmax = AdaptiveSoftmax( + len(dictionary), + self.output_embed_dim, + utils.eval_str_list(cfg.adaptive_softmax_cutoff, type=int), + dropout=cfg.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if cfg.tie_adaptive_weights else None, + factor=cfg.adaptive_softmax_factor, + tie_proj=cfg.tie_adaptive_proj, + ) + elif self.share_input_output_embed: + self.output_projection = nn.Linear( + self.embed_tokens.weight.shape[1], + self.embed_tokens.weight.shape[0], + bias=False, + ) + self.output_projection.weight = self.embed_tokens.weight + else: + self.output_projection = nn.Linear( + self.output_embed_dim, len(dictionary), bias=False + ) + nn.init.normal_( + self.output_projection.weight, mean=0, std=self.output_embed_dim ** -0.5 + ) + num_base_layers = cfg.base_layers + for i in range(num_base_layers): + self.layers.insert( + ((i + 1) * cfg.decoder.layers) // (num_base_layers + 1), + BaseLayer(cfg), + ) + + def build_decoder_layer(self, cfg, no_encoder_attn=False): + layer = TransformerDecoderLayerBase(cfg, no_encoder_attn, has_relative_attention_bias=self.use_rel_pos_enc) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + + if not features_only: + x = self.output_layer(x) + return x, extra + + def extract_features_scriptable( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + """ + Similar to *forward* but only return features. + + Includes several features from "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + bs, slen = prev_output_tokens.size() + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + enc: Optional[Tensor] = None + padding_mask: Optional[Tensor] = None + if encoder_out is not None and len(encoder_out["encoder_out"]) > 0: + enc = encoder_out["encoder_out"][0] + assert ( + enc.size()[1] == bs + ), f"Expected enc.shape == (t, {bs}, c) got {enc.shape}" + if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0: + padding_mask = encoder_out["encoder_padding_mask"][0] + + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if self.use_rel_pos_enc: + pos_seq = torch.arange(0, slen).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, _ = self.pos_emb(pos_seq, incremental_state) + else: + pos_k = None + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + + # decoder layers + attn: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = layer( + x, + enc, + padding_mask, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + pos_bias=pos_k, + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + + if attn is not None: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": [attn], "inner_states": inner_states} + + def output_layer(self, features): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + return self.output_projection(features) + else: + return features + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround. + if ( + self._future_mask.size(0) == 0 + or (not self._future_mask.device == tensor.device) + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1 + ) + self._future_mask = self._future_mask.to(tensor) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = "{}.embed_positions.weights".format(name) + if weights_key in state_dict: + del state_dict[weights_key] + state_dict[ + "{}.embed_positions._float_tensor".format(name) + ] = torch.FloatTensor(1) + + if f"{name}.output_projection.weight" not in state_dict: + if self.share_input_output_embed: + embed_out_key = f"{name}.embed_tokens.weight" + else: + embed_out_key = f"{name}.embed_out" + if embed_out_key in state_dict: + state_dict[f"{name}.output_projection.weight"] = state_dict[ + embed_out_key + ] + if not self.share_input_output_embed: + del state_dict[embed_out_key] + + for i in range(self.num_layers): + # update layer norms + layer_norm_map = { + "0": "self_attn_layer_norm", + "1": "encoder_attn_layer_norm", + "2": "final_layer_norm", + } + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m) + if k in state_dict: + state_dict[ + "{}.layers.{}.{}.{}".format(name, i, new, m) + ] = state_dict[k] + del state_dict[k] + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +class TransformerDecoder(TransformerDecoderBase): + def __init__( + self, + args, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + ): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + no_encoder_attn=no_encoder_attn, + output_projection=output_projection, + use_rel_pos_enc=args.use_rel_pos_enc, + ) + + def build_output_projection(self, args, dictionary, embed_tokens): + super().build_output_projection( + TransformerConfig.from_namespace(args), dictionary, embed_tokens + ) + + def build_decoder_layer(self, args, no_encoder_attn=False): + return super().build_decoder_layer( + TransformerConfig.from_namespace(args), no_encoder_attn=no_encoder_attn + ) + +class TransformerDecoderScriptable(TransformerDecoder): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + return x, None + diff --git a/SpeechT5/Speech2C/speech2c/models/modules/transformer_decoder_layer.py b/SpeechT5/Speech2C/speech2c/models/modules/transformer_decoder_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..780bb43d8d3aaf456c0ae4cf5223b9b7eae599e8 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/models/modules/transformer_decoder_layer.py @@ -0,0 +1,215 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +from typing import Dict, List, Optional + +import torch +from torch import Tensor +from fairseq.modules.transformer_layer import TransformerDecoderLayerBase as FairseqTransformerDecoderLayerBase +from fairseq.modules import LayerNorm + +from speech2c.models.modules.multihead_attention import MultiheadAttention + + +class TransformerDecoderLayerBase(FairseqTransformerDecoderLayerBase): + """Decoder layer block. + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.decoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, cfg, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False, has_relative_attention_bias=False + ): + super().__init__( + cfg, + no_encoder_attn, + add_bias_kv, + add_zero_attn, + ) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embed_dim // cfg.decoder.attention_heads) + + def build_self_attention( + self, embed_dim, cfg, add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttention( + embed_dim, + cfg.decoder.attention_heads, + dropout=cfg.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=not cfg.cross_self_attention, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + ) + + def forward( + self, + x, + encoder_out: Optional[torch.Tensor] = None, + encoder_padding_mask: Optional[torch.Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + prev_self_attn_state: Optional[List[torch.Tensor]] = None, + prev_attn_state: Optional[List[torch.Tensor]] = None, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + pos_bias=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if need_head_weights: + need_attn = True + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state, saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1 + ) + if self_attn_padding_mask is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + if self.c_attn is not None: + tgt_len, bsz = x.size(0), x.size(1) + x = x.view(tgt_len, bsz, self.nh, self.head_dim) + x = torch.einsum("tbhd,h->tbhd", x, self.c_attn) + x = x.reshape(tgt_len, bsz, self.embed_dim) + if self.attn_ln is not None: + x = self.attn_ln(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + if self.encoder_attn is not None and encoder_out is not None: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + if self.ffn_layernorm is not None: + x = self.ffn_layernorm(x) + x = self.fc2(x) + x = self.dropout_module(x) + if self.w_resid is not None: + residual = torch.mul(self.w_resid, residual) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]] + return x, attn, self_attn_state + return x, attn, None + + def make_generation_fast_(self, need_attn: bool = False, **kwargs): + self.need_attn = need_attn diff --git a/SpeechT5/Speech2C/speech2c/models/modules/transformer_encoder.py b/SpeechT5/Speech2C/speech2c/models/modules/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..6916c7960cf5bf6fc4fc60257ddb377bfea368fc --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/models/modules/transformer_encoder.py @@ -0,0 +1,278 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import math + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.dataclass import ChoiceEnum +from fairseq.modules import ( + LayerNorm, + MultiheadAttention, + SamePad, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from fairseq.utils import index_put +from fairseq.distributed import fsdp_wrap +from fairseq.models.wav2vec.utils import pad_to_multiple +from fairseq.models.wav2vec.wav2vec2 import TransformerEncoder as W2vTransformerEncoder + +from speech2c.models.modules.relative_pos_enc import RelativePositionalEncoding +from speech2c.models.modules.multihead_attention import MultiheadAttention + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) + + +class TransformerEncoder(W2vTransformerEncoder): + def __init__(self, args): + super().__init__(args) + + self.dropout = args.dropout + self.embedding_dim = args.encoder_embed_dim + self.required_seq_len_multiple = args.required_seq_len_multiple + self.use_rel_pos_enc = getattr(args, "use_rel_pos_enc", False) + + self.pos_conv = nn.Conv1d( + self.embedding_dim, + self.embedding_dim, + kernel_size=args.conv_pos, + padding=args.conv_pos // 2, + groups=args.conv_pos_groups, + ) + dropout = 0 + std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim)) + nn.init.normal_(self.pos_conv.weight, mean=0, std=std) + nn.init.constant_(self.pos_conv.bias, 0) + + self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2) + self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU()) + + layers = [] + for _ in range(args.encoder_layers): + layer = TransformerSentenceEncoderLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=self.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + has_relative_attention_bias=self.use_rel_pos_enc, + ) + if args.checkpoint_activations: + layer = fsdp_wrap(layer) + layer = checkpoint_wrapper(layer) + layers.append(layer) + self.layers = nn.ModuleList(layers) + + self.layer_norm_first = args.layer_norm_first + self.layer_norm = LayerNorm(self.embedding_dim) + self.layerdrop = args.encoder_layerdrop + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(args.encoder_embed_dim // args.encoder_attention_heads, 160) + + + self.apply(init_bert_params) + + def forward(self, x, padding_mask=None, layer=None): + x, layer_results = self.extract_features(x, padding_mask, layer) + + if self.layer_norm_first and layer is None: + x = self.layer_norm(x) + + return x, layer_results + + def extract_features(self, x, padding_mask=None, tgt_layer=None): + + if padding_mask is not None: + x = index_put(x, padding_mask, 0) + + x_conv = self.pos_conv(x.transpose(1, 2)) + x_conv = x_conv.transpose(1, 2) + x = x + x_conv + + if not self.layer_norm_first: + x = self.layer_norm(x) + + # pad to the sequence length dimension + x, pad_length = pad_to_multiple( + x, self.required_seq_len_multiple, dim=-2, value=0 + ) + if pad_length > 0 and padding_mask is None: + padding_mask = x.new_zeros((x.size(0), x.size(1)), dtype=torch.bool) + padding_mask[:, -pad_length:] = True + else: + padding_mask, _ = pad_to_multiple( + padding_mask, self.required_seq_len_multiple, dim=-1, value=True + ) + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + if self.use_rel_pos_enc: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + layer_results = [] + r = None + for i, layer in enumerate(self.layers): + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, z = layer(x, self_attn_padding_mask=padding_mask, need_weights=False, pos_bias=pos_k) + if tgt_layer is not None: + # unpad if needed + if pad_length > 0: + layer_results.append( + ( + x[:-pad_length], + z[:, :-pad_length, :-pad_length] + if z is not None + else z, + ) + ) + else: + layer_results.append((x, z)) + if i == tgt_layer: + r = x + break + + if r is not None: + x = r + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + # undo paddding + if pad_length > 0: + x = x[:, :-pad_length] + + return x, layer_results + + +class TransformerSentenceEncoderLayer(nn.Module): + """ + Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained + models. + """ + + def __init__( + self, + embedding_dim: float = 768, + ffn_embedding_dim: float = 3072, + num_attention_heads: float = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + layer_norm_first: bool = False, + has_relative_attention_bias: bool = False, + ) -> None: + + super().__init__() + # Initialize parameters + self.embedding_dim = embedding_dim + self.dropout = dropout + self.activation_dropout = activation_dropout + + # Initialize blocks + self.activation_fn = utils.get_activation_fn(activation_fn) + self.self_attn = MultiheadAttention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + self_attention=True, + ) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(self.activation_dropout) + self.dropout3 = nn.Dropout(dropout) + + self.layer_norm_first = layer_norm_first + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = LayerNorm(self.embedding_dim) + self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim) + self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim) + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = LayerNorm(self.embedding_dim) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embedding_dim//num_attention_heads) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + att_args=None, + pos_bias=None, + ): + """ + LayerNorm is applied either before or after the self-attention/ffn + modules similar to the original Transformer imlementation. + """ + residual = x + + if self.layer_norm_first: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + x = self.dropout1(x) + x = residual + x + + residual = x + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + else: + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + position_bias=pos_bias, + ) + + x = self.dropout1(x) + x = residual + x + + x = self.self_attn_layer_norm(x) + + residual = x + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + x = self.final_layer_norm(x) + + return x, attn diff --git a/SpeechT5/Speech2C/speech2c/models/speech2c.py b/SpeechT5/Speech2C/speech2c/models/speech2c.py new file mode 100644 index 0000000000000000000000000000000000000000..7ec69a679451172f8e32047c1bd2275932636e65 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/models/speech2c.py @@ -0,0 +1,321 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import logging +import copy +import contextlib +from typing import Dict, List, Optional, Tuple + +import torch +from dataclasses import dataclass, field +from fairseq.data.dictionary import Dictionary +from fairseq.models import register_model +from fairseq.models.hubert import HubertConfig, HubertModel +from fairseq.models.transformer import Embedding +from torch import Tensor +from speech2c.tasks.speech2c_pretraining import ( + Speech2cPretrainingConfig, + Speech2cPretrainingTask, +) + +from speech2c.models.modules.transformer_decoder import TransformerDecoderScriptable +from speech2c.models.modules.transformer_encoder import TransformerEncoder + +logger = logging.getLogger(__name__) + + +@dataclass +class Speech2cConfig(HubertConfig): + use_rel_pos_enc: bool = field( + default=False, + metadata={"help": "whether to use relative positional encoding"}, + ) + + # decoder + decoder_layers: int = field( + default=6, metadata={"help": "num decoder layers in the transformer"} + ) + decoder_embed_dim: int = field( + default=768, metadata={"help": "decoder embedding dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_attention_heads: int = field( + default=12, metadata={"help": "num decoder attention heads"} + ) + decoder_normalize_before: bool = field( + default=False, + metadata={"help": "apply layernorm before each decoder block"}, + ) + decoder_layerdrop: float = field( + default=0.0, + metadata={"help": "probability of dropping a tarnsformer layer"}, + ) + share_decoder_input_output_embed: bool = field( + default=False, + metadata={"help": "share decoder input and output embeddings"}, + ) + decoder_output_dim: int = field( + default=768, metadata={"help": "decoder output dimension"} + ) + max_target_positions: int = field( + default=3000, metadata={"help": "max target position"} + ) + no_scale_embedding: bool = field( + default=False, + metadata={"help": "not scale embedding"}, + ) + adaptive_input: bool = field( + default=False, + metadata={"help": "adaptive input"}, + ) + quant_noise_pq: int = field( + default=0, metadata={"help": "quant noise pq"} + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "decoder learnable positional embedding"}, + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={"help": "no token positional embeddings"}, + ) + decoder_dict_size: int = field( + default=-1, + metadata={"help": "decoder dictionary dimension, only used for fine-tuning"}, + ) + + # FP16 optimization + required_seq_len_multiple: int = field( + default=1, + metadata={ + "help": "pad the input to encoder such that the sequence length is divisible by multiple" + }, + ) + crop_seq_to_multiple: int = field( + default=1, + metadata={ + "help": "crop convolutional feature extractor output such that the sequence length is divisible by multiple" + }, + ) + + +@register_model("speech2c", dataclass=Speech2cConfig) +class Speech2cModel(HubertModel): + def __init__( + self, + cfg: Speech2cConfig, + task_cfg: Speech2cPretrainingConfig, + dictionaries: List[Dictionary], + ) -> None: + super().__init__(cfg, task_cfg, dictionaries) + logger.info(f"Speech2cModel Config: {cfg}") + + self.encoder = TransformerEncoder(cfg) + + self.add_decoder = task_cfg.add_decoder + if task_cfg.add_decoder: + def build_embedding(dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + return Embedding(num_embeddings, embed_dim, padding_idx) + + # To make sure that the decoder dict size is the same as the fine-tuning tgt_dict size + cut_dictionary = copy.deepcopy(dictionaries[0]) + if cfg.decoder_dict_size != -1: + cut_dictionary.symbols = cut_dictionary.symbols[:cfg.decoder_dict_size] + + decoder_embed_tokens = build_embedding( + cut_dictionary, cfg.decoder_embed_dim + ) + + self.decoder = TransformerDecoderScriptable(cfg, cut_dictionary, decoder_embed_tokens) + + + @classmethod + def build_model(cls, cfg: Speech2cConfig, task: Speech2cPretrainingTask): + """Build a new model instance.""" + + model = Speech2cModel(cfg, task.cfg, task.dictionaries) + return model + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + # net_output['encoder_out'] is a (B, T, D) tensor + lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + def forward( + self, + source: torch.Tensor, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + prev_output_tokens: Optional[torch.Tensor] = None, + ) -> Dict[str, torch.Tensor]: + """output layer is 1-based""" + features = self.forward_features(source) + if target_list is not None: + features, target_list = self.forward_targets(features, target_list) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + + if mask: + x, mask_indices = self.apply_mask(features, padding_mask, target_list) + else: + x = features + mask_indices = None + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, _ = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1, + ) + + if features_only: + return {"x": x, "padding_mask": padding_mask, "features": features} + + def compute_pred(proj_x, target, label_embs): + # compute logits for the i-th label set + y = torch.index_select(label_embs, 0, target.long()) + negs = label_embs.unsqueeze(1).expand(-1, proj_x.size(0), -1) + if self.target_glu: + y = self.target_glu(y) + negs = self.target_glu(negs) + # proj_x: (S, D) + # y: (S, D) + # negs: (Neg, S, D) + return self.compute_nce(proj_x, y, negs) + + label_embs_list = self.label_embs_concat.split(self.num_classes, 0) + + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + proj_x_m = self.final_proj(x[masked_indices]) + if self.untie_final_proj: + proj_x_m_list = proj_x_m.chunk(len(target_list), dim=-1) + else: + proj_x_m_list = [proj_x_m for _ in range(len(target_list))] + logit_m_list = [ + compute_pred(proj_x_m, t[masked_indices], label_embs_list[i]) + for i, (proj_x_m, t) in enumerate(zip(proj_x_m_list, target_list)) + ] + else: + logit_m_list = [None for _ in target_list] + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + proj_x_u = self.final_proj(x[nomask_indices]) + if self.untie_final_proj: + proj_x_u_list = proj_x_u.chunk(len(target_list), dim=-1) + else: + proj_x_u_list = [proj_x_u for _ in range(len(target_list))] + + logit_u_list = [ + compute_pred(proj_x_u, t[nomask_indices], label_embs_list[i]) + for i, (proj_x_u, t) in enumerate(zip(proj_x_u_list, target_list)) + ] + else: + logit_u_list = [None for _ in target_list] + + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + if self.add_decoder: + encoder_out = { + "encoder_out": [x.transpose(0, 1)], # T x B x C + "encoder_padding_mask": [padding_mask], # B x T + } + assert prev_output_tokens is not None + decoder_out = self.decoder( + prev_output_tokens=prev_output_tokens, encoder_out=encoder_out + ) + result['decoder_out'] = decoder_out + return result + + def forward_torchscript(self, net_input: Dict[str, Tensor]): + """A TorchScript-compatible version of forward. + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + res = self.forward( + net_input["source"], + padding_mask=net_input["padding_mask"], + mask=False, + features_only=True + ) + + encoder_out = { + "encoder_out": [res["x"].transpose(0, 1)], # T x B x C + "encoder_padding_mask": [res["padding_mask"]], # B x T + } + return encoder_out + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + prev_output_tokens: Optional[torch.Tensor] = None, + ft: bool = True, + ) -> Tuple[torch.Tensor, torch.Tensor]: + with torch.no_grad() if not ft else contextlib.ExitStack(): + res = self.forward( + source, + padding_mask=padding_mask, + mask=mask, + features_only=True, + output_layer=output_layer, + ) + + feature = res["features"] if ret_conv else res["x"] + if self.add_decoder: + encoder_out = { + "encoder_out": [feature.transpose(0, 1)], # T x B x C + "encoder_padding_mask": [res["padding_mask"]], # B x T + } + assert prev_output_tokens is not None + decoder_out = self.decoder( + prev_output_tokens=prev_output_tokens, + encoder_out=encoder_out, + ) + else: + decoder_out = None + return feature, res["padding_mask"], decoder_out diff --git a/SpeechT5/Speech2C/speech2c/models/speech2c_asr.py b/SpeechT5/Speech2C/speech2c/models/speech2c_asr.py new file mode 100644 index 0000000000000000000000000000000000000000..9bf8aed97d97f1fd352a884f10173c11043f6a92 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/models/speech2c_asr.py @@ -0,0 +1,276 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +from argparse import Namespace +from omegaconf import II + +import torch.nn as nn +from dataclasses import dataclass, field +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import BaseFairseqModel, FairseqEncoder, register_model +from fairseq.models.hubert.hubert_asr import HubertAsrConfig, Linear +from fairseq.tasks import FairseqTask + + +@dataclass +class Speech2cAsrConfig(HubertAsrConfig): + # for decoder + decoder_layerdrop: float = field( + default=0.0, + metadata={"help": "probability of dropping a decoder layer in hubert"}, + ) + + add_decoder: bool = II("task.add_decoder") + +@dataclass +class Speech2cCtcConfig(Speech2cAsrConfig): + pass + + +@register_model("speech2c_ctc", dataclass=Speech2cCtcConfig) +class Speech2cCtc(BaseFairseqModel): + def __init__(self, cfg: Speech2cCtcConfig, w2v_encoder: BaseFairseqModel): + super().__init__() + self.cfg = cfg + self.w2v_encoder = w2v_encoder + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: Speech2cCtcConfig, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = Speech2cEncoder(cfg, task.target_dictionary) + return cls(cfg, w2v_encoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + if "encoder_out" not in net_output: + return self.w2v_encoder.get_normalized_probs_decoder(net_output, log_probs, sample) + + if "encoder_out_for_ctc" in net_output: + logits = net_output["encoder_out_for_ctc"] + else: + logits = net_output["encoder_out"] + + if isinstance(logits, list): + logits = logits[0] + + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + def get_logits(self, net_output): + logits = net_output["encoder_out"] + padding = net_output["encoder_padding_mask"] + if padding is not None and padding.any(): + padding = padding.T + logits[padding][..., 0] = 0 + logits[padding][..., 1:] = float("-inf") + + return logits + + def forward(self, **kwargs): + x = self.w2v_encoder(**kwargs) + return x + + @property + def encoder(self): + return self.w2v_encoder + + def reorder_encoder_out(self, encoder_out, new_order): + return self.encoder.reorder_encoder_out(encoder_out, new_order) + + @property + def decoder(self): + return self.w2v_encoder.w2v_model.decoder + + +class Speech2cEncoder(FairseqEncoder): + def __init__(self, cfg: Speech2cAsrConfig, tgt_dict=None): + self.apply_mask = cfg.apply_mask + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "decoder_layerdrop": cfg.decoder_layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + "decoder_dict_size": len(tgt_dict) if cfg.add_decoder else -1, + } + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + assert cfg.normalize == w2v_args.task.normalize, ( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + w2v_args.task.data = cfg.data + w2v_args.task.add_decoder = cfg.add_decoder + task = tasks.setup_task(w2v_args.task) + if state is not None and "task_state" in state: + # This will load the stored "dictionaries" object + task.load_state_dict(state["task_state"]) + model = task.build_model(w2v_args.model) + + if state is not None and not cfg.no_pretrained_weights: + if "decoder.embed_tokens.weight" in state["model"]: + del state["model"]["decoder.embed_tokens.weight"] + if "decoder.output_projection.weight" in state["model"]: + del state["model"]["decoder.output_projection.weight"] + # set strict=False because we omit some modules + model.load_state_dict(state["model"], strict=False) + + model.remove_pretraining_modules() + + super().__init__(task.source_dictionary) + + d = model.mask_emb.size(0) + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.num_updates = 0 + + if tgt_dict is not None: + self.proj = Linear(d, len(tgt_dict)) + elif getattr(cfg, "decoder_embed_dim", d) != d: + self.proj = Linear(d, cfg.decoder_embed_dim) + else: + self.proj = None + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, source, padding_mask, prev_output_tokens=None, tbc=True, **kwargs): + + ft = self.freeze_finetune_updates <= self.num_updates + w2v_args = { + "source": source, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + "prev_output_tokens": prev_output_tokens, + "ft": ft, + } + + x, padding_mask, decoder_out = self.w2v_model.extract_features(**w2v_args) + + if tbc: + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + x = self.final_dropout(x) + + if self.proj: + x = self.proj(x) + + return { + "encoder_out": x, # T x B x C + "encoder_padding_mask": padding_mask, # B x T + "padding_mask": padding_mask, + "decoder_out": decoder_out, + } + + def get_normalized_probs_decoder(self, net_output, log_probs, sample=None): + # net_output['encoder_out'] is a (B, T, D) tensor + return self.w2v_model.get_normalized_probs(net_output, log_probs, sample) + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + if isinstance(encoder_out["encoder_out"], list): + encoder_out["encoder_out"] = ( + [] if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + else: + encoder_out["encoder_out"] = encoder_out[ + "encoder_out" + ].index_select(1, new_order) + if encoder_out["encoder_padding_mask"] is not None: + if isinstance(encoder_out["encoder_padding_mask"], list): + encoder_out["encoder_padding_mask"] = ( + [] if len(encoder_out["encoder_padding_mask"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"]] + ) + else: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + if "decoder_out" in encoder_out and encoder_out["decoder_out"] is not None: + if isinstance(encoder_out["decoder_out"], list): + encoder_out["decoder_out"] = ( + [] if len(encoder_out["decoder_out"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["decoder_out"]] + ) + else: + encoder_out["decoder_out"] = encoder_out[ + "decoder_out" + ].index_select(0, new_order) + if "encoder_out_for_ctc" in encoder_out and encoder_out["encoder_out_for_ctc"] is not None: + if isinstance(encoder_out["encoder_out_for_ctc"], list): + encoder_out["encoder_out_for_ctc"] = ( + [] if len(encoder_out["encoder_out_for_ctc"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out_for_ctc"]] + ) + else: + encoder_out["encoder_out_for_ctc"] = encoder_out[ + "encoder_out_for_ctc" + ].index_select(1, new_order) + + return encoder_out + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + encoder_out = self.w2v_model.forward_torchscript(net_input) + + assert self.proj is not None + encoder_out['encoder_out_for_ctc'] = [self.proj(encoder_out['encoder_out'][0])] + + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + diff --git a/SpeechT5/Speech2C/speech2c/models/t5_transformer_lm.py b/SpeechT5/Speech2C/speech2c/models/t5_transformer_lm.py new file mode 100644 index 0000000000000000000000000000000000000000..3d16a2df00b692114f8d84d254cf486d09e1137b --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/models/t5_transformer_lm.py @@ -0,0 +1,25 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +from fairseq.models import ( + register_model_architecture, +) +from fairseq.models.transformer_lm import base_lm_architecture + + +@register_model_architecture(model_name="transformer_lm", arch_name="transformer_lm_t5") +def transformer_lm_t5(args): + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1280) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 6144) + args.decoder_layers = getattr(args, "decoder_layers", 20) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_fn = getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) diff --git a/SpeechT5/Speech2C/speech2c/squence_generator.py b/SpeechT5/Speech2C/speech2c/squence_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..e51e8021fe9e4e48619340412df012937db54198 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/squence_generator.py @@ -0,0 +1,1028 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import math +from typing import Dict, List, Optional +import sys + +import torch +import torch.nn as nn +from fairseq import search, utils +from fairseq.data import data_utils +from fairseq.models import FairseqIncrementalDecoder +from torch import Tensor +from fairseq.ngram_repeat_block import NGramRepeatBlock +from speech2c.models.modules.ctc_prefix_score import CTCPrefixScore +import numpy + + +CTC_SCORING_RATIO = 7.0 + +class SequenceGenerator(nn.Module): + def __init__( + self, + models, + tgt_dict, + beam_size=1, + max_len_a=0, + max_len_b=200, + max_len=0, + min_len=1, + normalize_scores=True, + len_penalty=1.0, + unk_penalty=0.0, + temperature=1.0, + match_source_len=False, + no_repeat_ngram_size=0, + search_strategy=None, + eos=None, + symbols_to_strip_from_output=None, + lm_model=None, + lm_weight=1.0, + ctc_weight=0.0, + ): + """Generates translations of a given source sentence. + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models, + currently support fairseq.models.TransformerModel for scripting + beam_size (int, optional): beam width (default: 1) + max_len_a/b (int, optional): generate sequences of maximum length + ax + b, where x is the source length + max_len (int, optional): the maximum length of the generated output + (not including end-of-sentence) + min_len (int, optional): the minimum length of the generated output + (not including end-of-sentence) + normalize_scores (bool, optional): normalize scores by the length + of the output (default: True) + len_penalty (float, optional): length penalty, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + unk_penalty (float, optional): unknown word penalty, where <0 + produces more unks, >0 produces fewer (default: 0.0) + temperature (float, optional): temperature, where values + >1.0 produce more uniform samples and values <1.0 produce + sharper samples (default: 1.0) + match_source_len (bool, optional): outputs should match the source + length (default: False) + """ + super().__init__() + if isinstance(models, EnsembleModel): + self.model = models + else: + self.model = EnsembleModel(models) + self.tgt_dict = tgt_dict + self.pad = tgt_dict.pad() + self.unk = tgt_dict.unk() + self.eos = tgt_dict.eos() if eos is None else eos + self.blank = self.tgt_dict.index("") + self.symbols_to_strip_from_output = ( + symbols_to_strip_from_output.union({self.eos}) + if symbols_to_strip_from_output is not None + else {self.eos} + ) + self.vocab_size = len(tgt_dict) + self.beam_size = beam_size + # the max beam size is the dictionary size - 1, since we never select pad + self.beam_size = min(beam_size, self.vocab_size - 1) + self.max_len_a = max_len_a + self.max_len_b = max_len_b + self.min_len = min_len + self.max_len = max_len or self.model.max_decoder_positions() + + self.normalize_scores = normalize_scores + self.len_penalty = len_penalty + self.unk_penalty = unk_penalty + self.temperature = temperature + self.match_source_len = match_source_len + + if no_repeat_ngram_size > 0: + self.repeat_ngram_blocker = NGramRepeatBlock(no_repeat_ngram_size) + else: + self.repeat_ngram_blocker = None + + assert temperature > 0, "--temperature must be greater than 0" + + self.search = ( + search.BeamSearch(tgt_dict) if search_strategy is None else search_strategy + ) + # We only need to set src_lengths in LengthConstrainedBeamSearch. + # As a module attribute, setting it would break in multithread + # settings when the model is shared. + self.should_set_src_lengths = ( + hasattr(self.search, "needs_src_lengths") and self.search.needs_src_lengths + ) + + self.model.eval() + + self.lm_model = lm_model + self.lm_weight = lm_weight + self.ctc_weight = ctc_weight + if self.lm_model is not None: + self.lm_model.eval() + + def cuda(self): + self.model.cuda() + return self + + @torch.no_grad() + def forward( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + """Generate a batch of translations. + Args: + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, prefix_tokens, bos_token=bos_token) + + # TODO(myleott): unused, deprecate after pytorch-translate migration + def generate_batched_itr(self, data_itr, beam_size=None, cuda=False, timer=None): + """Iterate over a batched dataset and yield individual translations. + Args: + cuda (bool, optional): use GPU for generation + timer (StopwatchMeter, optional): time generations + """ + for sample in data_itr: + s = utils.move_to_cuda(sample) if cuda else sample + if "net_input" not in s: + continue + input = s["net_input"] + # model.forward normally channels prev_output_tokens into the decoder + # separately, but SequenceGenerator directly calls model.encoder + encoder_input = { + k: v for k, v in input.items() if k != "prev_output_tokens" + } + if timer is not None: + timer.start() + with torch.no_grad(): + hypos = self.generate(encoder_input) + if timer is not None: + timer.stop(sum(len(h[0]["tokens"]) for h in hypos)) + for i, id in enumerate(s["id"].data): + # remove padding + src = utils.strip_pad(input["src_tokens"].data[i, :], self.pad) + ref = ( + utils.strip_pad(s["target"].data[i, :], self.pad) + if s["target"] is not None + else None + ) + yield id, src, ref, hypos[i] + + @torch.no_grad() + def generate(self, models, sample: Dict[str, Dict[str, Tensor]], **kwargs) -> List[List[Dict[str, Tensor]]]: + """Generate translations. Match the api of other fairseq generators. + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + constraints (torch.LongTensor, optional): force decoder to include + the list of constraints + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, **kwargs) + + def _generate( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + constraints: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) + for i in range(self.model.models_size) + ], + ) + net_input = sample["net_input"] + + if "src_tokens" in net_input: + src_tokens = net_input["src_tokens"] + # length of the source text being the character length except EndOfSentence and pad + src_lengths = ( + (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1) + ) + elif "source" in net_input: + src_tokens = net_input["source"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + elif "features" in net_input: + src_tokens = net_input["features"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + else: + raise Exception("expected src_tokens or source in net input. input keys: " + str(net_input.keys())) + + # bsz: total number of sentences in beam + # Note that src_tokens may have more than 2 dimensions (i.e. audio features) + bsz, src_len = src_tokens.size()[:2] + beam_size = self.beam_size + + if constraints is not None and not self.search.supports_constraints: + raise NotImplementedError( + "Target-side constraints were provided, but search method doesn't support them" + ) + + # Initialize constraints, when active + self.search.init_constraints(constraints, beam_size) + + max_len: int = -1 + if self.match_source_len: + max_len = src_lengths.max().item() + else: + max_len = min( + int(self.max_len_a * src_len + self.max_len_b), + self.max_len - 1, + ) + assert ( + self.min_len <= max_len + ), "min_len cannot be larger than max_len, please adjust these!" + # compute the encoder output for each beam + with torch.autograd.profiler.record_function("EnsembleModel: forward_encoder"): + encoder_outs = self.model.forward_encoder(net_input) + + # Get CTC lprobs and prep ctc_scorer + if self.ctc_weight > 0: + ctc_lprobs = self.model.models[0].get_normalized_probs( + encoder_outs[0], log_probs=True + ).contiguous().transpose(0, 1) # (B, T, C) from the encoder + + hyp = {} + ctc_prefix_score = CTCPrefixScore(ctc_lprobs[0].detach().cpu().numpy(), self.blank, self.eos, numpy) + hyp["ctc_state_prev"] = ctc_prefix_score.initial_state() + hyp["ctc_score_prev"] = 0.0 + ctc_beam = min(ctc_lprobs.shape[-1], int(beam_size * CTC_SCORING_RATIO)) + ctc_hyps = {str(self.eos): hyp} + + # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores + new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) + new_order = new_order.to(src_tokens.device).long() + encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order) + # ensure encoder_outs is a List. + assert encoder_outs is not None + + # initialize buffers + scores = ( + torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float() + ) # +1 for eos; pad is never chosen for scoring + tokens = ( + torch.zeros(bsz * beam_size, max_len + 2) + .to(src_tokens) + .long() + .fill_(self.pad) + ) # +2 for eos and pad + tokens[:, 0] = self.eos if bos_token is None else bos_token + attn: Optional[Tensor] = None + + # A list that indicates candidates that should be ignored. + # For example, suppose we're sampling and have already finalized 2/5 + # samples. Then cands_to_ignore would mark 2 positions as being ignored, + # so that we only finalize the remaining 3 samples. + cands_to_ignore = ( + torch.zeros(bsz, beam_size).to(src_tokens).eq(-1) + ) # forward and backward-compatible False mask + + # list of completed sentences + finalized = torch.jit.annotate( + List[List[Dict[str, Tensor]]], + [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)], + ) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step + + # a boolean array indicating if the sentence at the index is finished or not + finished = [False for i in range(bsz)] + num_remaining_sent = bsz # number of sentences remaining + + # number of candidate hypos per step + cand_size = 2 * beam_size # 2 x beam size in case half are EOS + + # offset arrays for converting between different indexing schemes + bbsz_offsets = ( + (torch.arange(0, bsz) * beam_size) + .unsqueeze(1) + .type_as(tokens) + .to(src_tokens.device) + ) + cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device) + + reorder_state: Optional[Tensor] = None + batch_idxs: Optional[Tensor] = None + + original_batch_idxs: Optional[Tensor] = None + if "id" in sample and isinstance(sample["id"], Tensor): + original_batch_idxs = sample["id"] + else: + original_batch_idxs = torch.arange(0, bsz).type_as(tokens) + + for step in range(max_len + 1): # one extra step for EOS marker + # reorder decoder internal states based on the prev choice of beams + if reorder_state is not None: + if batch_idxs is not None: + # update beam indices to take into account removed sentences + corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as( + batch_idxs + ) + reorder_state.view(-1, beam_size).add_( + corr.unsqueeze(-1) * beam_size + ) + original_batch_idxs = original_batch_idxs[batch_idxs] + self.model.reorder_incremental_state(incremental_states, reorder_state) + encoder_outs = self.model.reorder_encoder_out( + encoder_outs, reorder_state + ) + with torch.autograd.profiler.record_function("EnsembleModel: forward_decoder"): + lprobs, avg_attn_scores = self.model.forward_decoder( + tokens[:, : step + 1], + encoder_outs, + incremental_states, + self.temperature, + ) + + if self.ctc_weight > 0 and step != 0: + # lprobs[:, self.blank] = -math.inf # never select blank + ctc_lprobs = lprobs.clone() + ctc_lprobs[:, self.blank] = -math.inf # never select blank + _, local_best_ids = torch.topk(ctc_lprobs, ctc_beam, dim=-1) + for b in range(tokens.size(0)): + hyp_key = " ".join(str(x) for x in tokens[b, : step + 1].tolist()) + ctc_scores, ctc_states = ctc_prefix_score( + tokens[b, : step + 1].cpu(), local_best_ids[b].cpu(), ctc_hyps[hyp_key]["ctc_state_prev"] + ) + lprobs[b] = lprobs[b] + lprobs[b, local_best_ids[b]] = (1 - self.ctc_weight) * (lprobs[b, local_best_ids[b]]) + self.ctc_weight * torch.from_numpy( + ctc_scores - ctc_hyps[hyp_key]["ctc_score_prev"] + ).to(device="cuda") + for j in range(len(local_best_ids[b])): + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())] = {} + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_score_prev"] = ctc_scores[j] + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_state_prev"] = ctc_states[j] + + elif self.ctc_weight > 0 and step == 0: + ctc_lprobs = lprobs.clone() + ctc_lprobs[:, self.blank] = -math.inf # never select blank + _, local_best_ids = torch.topk(ctc_lprobs, ctc_beam, dim=-1) + for b in range(tokens.size(0)): + hyp_key = " ".join(str(x) for x in tokens[b, : step + 1].tolist()) + ctc_scores, ctc_states = ctc_prefix_score( + tokens[b, : step + 1].cpu(), local_best_ids[b].cpu(), ctc_hyps[hyp_key]["ctc_state_prev"] + ) + lprobs[b] = lprobs[b] + lprobs[b, local_best_ids[b]] = (1 - self.ctc_weight) * (lprobs[b, local_best_ids[b]]) + self.ctc_weight * torch.from_numpy( + ctc_scores - ctc_hyps[hyp_key]["ctc_score_prev"] + ).to(device="cuda") + for j in range(len(local_best_ids[b])): + if b == 0: + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())] = {} + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_score_prev"] = ctc_scores[j] + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_state_prev"] = ctc_states[j] + + if self.lm_model is not None: + lm_out = self.lm_model(tokens[:, : step + 1]) + probs = self.lm_model.get_normalized_probs( + lm_out, log_probs=True, sample=None + ) + probs = probs[:, -1, :] * self.lm_weight + lprobs += probs + # handle prefix tokens (possibly with different lengths) + if ( + prefix_tokens is not None + and step < prefix_tokens.size(1) + and step < max_len + ): + lprobs, tokens, scores = self._prefix_tokens( + step, lprobs, scores, tokens, prefix_tokens, beam_size + ) + elif step < self.min_len: + # minimum length constraint (does not apply if using prefix_tokens) + lprobs[:, self.eos] = -math.inf + + lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs) + + lprobs[:, self.pad] = -math.inf # never select pad + lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty + lprobs[:, self.blank] = -math.inf # never select blank + + # handle max length constraint + if step >= max_len: + lprobs[:, : self.eos] = -math.inf + lprobs[:, self.eos + 1 :] = -math.inf + + # Record attention scores, only support avg_attn_scores is a Tensor + if avg_attn_scores is not None: + if attn is None: + attn = torch.empty( + bsz * beam_size, avg_attn_scores.size(1), max_len + 2 + ).to(scores) + attn[:, :, step + 1].copy_(avg_attn_scores) + + scores = scores.type_as(lprobs) + eos_bbsz_idx = torch.empty(0).to( + tokens + ) # indices of hypothesis ending with eos (finished sentences) + eos_scores = torch.empty(0).to( + scores + ) # scores of hypothesis ending with eos (finished sentences) + + if self.should_set_src_lengths: + self.search.set_src_lengths(src_lengths) + + if self.repeat_ngram_blocker is not None: + lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz, beam_size, step) + + # Shape: (batch, cand_size) + cand_scores, cand_indices, cand_beams = self.search.step( + step, + lprobs.view(bsz, -1, self.vocab_size), + scores.view(bsz, beam_size, -1)[:, :, :step], + tokens[:, : step + 1], + original_batch_idxs, + ) + + # cand_bbsz_idx contains beam indices for the top candidate + # hypotheses, with a range of values: [0, bsz*beam_size), + # and dimensions: [bsz, cand_size] + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + + # finalize hypotheses that end in eos + # Shape of eos_mask: (batch size, beam size) + eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf) + eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask) + + # only consider eos when it's among the top beam_size indices + # Now we know what beam item(s) to finish + # Shape: 1d list of absolute-numbered + eos_bbsz_idx = torch.masked_select( + cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents: List[int] = [] + if eos_bbsz_idx.numel() > 0: + eos_scores = torch.masked_select( + cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents = self.finalize_hypos( + step, + eos_bbsz_idx, + eos_scores, + tokens, + scores, + finalized, + finished, + beam_size, + attn, + src_lengths, + max_len, + ) + num_remaining_sent -= len(finalized_sents) + + assert num_remaining_sent >= 0 + if num_remaining_sent == 0: + break + if self.search.stop_on_max_len and step >= max_len: + break + assert step < max_len, f"{step} < {max_len}" + + # Remove finalized sentences (ones for which {beam_size} + # finished hypotheses have been generated) from the batch. + if len(finalized_sents) > 0: + new_bsz = bsz - len(finalized_sents) + + # construct batch_idxs which holds indices of batches to keep for the next pass + batch_mask = torch.ones( + bsz, dtype=torch.bool, device=cand_indices.device + ) + batch_mask[finalized_sents] = False + # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it + batch_idxs = torch.arange( + bsz, device=cand_indices.device + ).masked_select(batch_mask) + + # Choose the subset of the hypothesized constraints that will continue + self.search.prune_sentences(batch_idxs) + + eos_mask = eos_mask[batch_idxs] + cand_beams = cand_beams[batch_idxs] + bbsz_offsets.resize_(new_bsz, 1) + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + cand_scores = cand_scores[batch_idxs] + cand_indices = cand_indices[batch_idxs] + + if prefix_tokens is not None: + prefix_tokens = prefix_tokens[batch_idxs] + src_lengths = src_lengths[batch_idxs] + cands_to_ignore = cands_to_ignore[batch_idxs] + + scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + if attn is not None: + attn = attn.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, attn.size(1), -1 + ) + bsz = new_bsz + else: + batch_idxs = None + + # Set active_mask so that values > cand_size indicate eos hypos + # and values < cand_size indicate candidate active hypos. + # After, the min values per row are the top candidate active hypos + + # Rewrite the operator since the element wise or is not supported in torchscript. + + eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size])) + active_mask = torch.add( + eos_mask.type_as(cand_offsets) * cand_size, + cand_offsets[: eos_mask.size(1)], + ) + + # get the top beam_size active hypotheses, which are just + # the hypos with the smallest values in active_mask. + # {active_hypos} indicates which {beam_size} hypotheses + # from the list of {2 * beam_size} candidates were + # selected. Shapes: (batch size, beam size) + new_cands_to_ignore, active_hypos = torch.topk( + active_mask, k=beam_size, dim=1, largest=False + ) + + # update cands_to_ignore to ignore any finalized hypos. + cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size] + # Make sure there is at least one active item for each sentence in the batch. + assert (~cands_to_ignore).any(dim=1).all() + + # update cands_to_ignore to ignore any finalized hypos + + # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam + # can be selected more than once). + active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos) + active_scores = torch.gather(cand_scores, dim=1, index=active_hypos) + + active_bbsz_idx = active_bbsz_idx.view(-1) + active_scores = active_scores.view(-1) + + # copy tokens and scores for active hypotheses + + # Set the tokens for each beam (can select the same row more than once) + tokens[:, : step + 1] = torch.index_select( + tokens[:, : step + 1], dim=0, index=active_bbsz_idx + ) + # Select the next token for each of them + tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather( + cand_indices, dim=1, index=active_hypos + ) + if step > 0: + scores[:, :step] = torch.index_select( + scores[:, :step], dim=0, index=active_bbsz_idx + ) + scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather( + cand_scores, dim=1, index=active_hypos + ) + + # Update constraints based on which candidates were selected for the next beam + self.search.update_constraints(active_hypos) + + # copy attention for active hypotheses + if attn is not None: + attn[:, :, : step + 2] = torch.index_select( + attn[:, :, : step + 2], dim=0, index=active_bbsz_idx + ) + + # reorder incremental state in decoder + reorder_state = active_bbsz_idx + + # sort by score descending + for sent in range(len(finalized)): + scores = torch.tensor( + [float(elem["score"].item()) for elem in finalized[sent]] + ) + _, sorted_scores_indices = torch.sort(scores, descending=True) + finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices] + finalized[sent] = torch.jit.annotate( + List[Dict[str, Tensor]], finalized[sent] + ) + return finalized + + def _prefix_tokens( + self, step: int, lprobs, scores, tokens, prefix_tokens, beam_size: int + ): + """Handle prefix tokens""" + prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1) + prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1)) + prefix_mask = prefix_toks.ne(self.pad) + lprobs[prefix_mask] = torch.min(prefix_lprobs) - 1 + lprobs[prefix_mask] = lprobs[prefix_mask].scatter( + -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs[prefix_mask] + ) + # if prefix includes eos, then we should make sure tokens and + # scores are the same across all beams + eos_mask = prefix_toks.eq(self.eos) + if eos_mask.any(): + # validate that the first beam matches the prefix + first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[ + :, 0, 1 : step + 1 + ] + eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0] + target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step] + assert (first_beam == target_prefix).all() + + # copy tokens, scores and lprobs from the first beam to all beams + tokens = self.replicate_first_beam(tokens, eos_mask_batch_dim, beam_size) + scores = self.replicate_first_beam(scores, eos_mask_batch_dim, beam_size) + lprobs = self.replicate_first_beam(lprobs, eos_mask_batch_dim, beam_size) + return lprobs, tokens, scores + + def replicate_first_beam(self, tensor, mask, beam_size: int): + tensor = tensor.view(-1, beam_size, tensor.size(-1)) + tensor[mask] = tensor[mask][:, :1, :] + return tensor.view(-1, tensor.size(-1)) + + def finalize_hypos( + self, + step: int, + bbsz_idx, + eos_scores, + tokens, + scores, + finalized: List[List[Dict[str, Tensor]]], + finished: List[bool], + beam_size: int, + attn: Optional[Tensor], + src_lengths, + max_len: int, + ): + """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly. + A sentence is finalized when {beam_size} finished items have been collected for it. + Returns number of sentences (not beam items) being finalized. + These will be removed from the batch and not processed further. + Args: + bbsz_idx (Tensor): + """ + assert bbsz_idx.numel() == eos_scores.numel() + + # clone relevant token and attention tensors. + # tokens is (batch * beam, max_len). So the index_select + # gets the newly EOS rows, then selects cols 1..{step + 2} + tokens_clone = tokens.index_select(0, bbsz_idx)[ + :, 1 : step + 2 + ] # skip the first index, which is EOS + + tokens_clone[:, step] = self.eos + attn_clone = ( + attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2] + if attn is not None + else None + ) + + # compute scores per token position + pos_scores = scores.index_select(0, bbsz_idx)[:, : step + 1] + pos_scores[:, step] = eos_scores + # convert from cumulative to per-position scores + pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] + + # normalize sentence-level scores + if self.normalize_scores: + eos_scores /= (step + 1) ** self.len_penalty + + # cum_unfin records which sentences in the batch are finished. + # It helps match indexing between (a) the original sentences + # in the batch and (b) the current, possibly-reduced set of + # sentences. + cum_unfin: List[int] = [] + prev = 0 + for f in finished: + if f: + prev += 1 + else: + cum_unfin.append(prev) + cum_fin_tensor = torch.tensor(cum_unfin, dtype=torch.int).to(bbsz_idx) + + unfin_idx = bbsz_idx // beam_size + sent = unfin_idx + torch.index_select(cum_fin_tensor, 0, unfin_idx) + + # Create a set of "{sent}{unfin_idx}", where + # "unfin_idx" is the index in the current (possibly reduced) + # list of sentences, and "sent" is the index in the original, + # unreduced batch + # For every finished beam item + # sentence index in the current (possibly reduced) batch + seen = (sent << 32) + unfin_idx + unique_seen: List[int] = torch.unique(seen).tolist() + + if self.match_source_len: + condition = step > torch.index_select(src_lengths, 0, unfin_idx) + eos_scores = torch.where(condition, torch.tensor(-math.inf), eos_scores) + sent_list: List[int] = sent.tolist() + for i in range(bbsz_idx.size()[0]): + # An input sentence (among those in a batch) is finished when + # beam_size hypotheses have been collected for it + if len(finalized[sent_list[i]]) < beam_size: + if attn_clone is not None: + # remove padding tokens from attn scores + hypo_attn = attn_clone[i] + else: + hypo_attn = torch.empty(0) + + finalized[sent_list[i]].append( + { + "tokens": tokens_clone[i], + "score": eos_scores[i], + "attention": hypo_attn, # src_len x tgt_len + "alignment": torch.empty(0), + "positional_scores": pos_scores[i], + } + ) + + newly_finished: List[int] = [] + for unique_s in unique_seen: + # check termination conditions for this sentence + unique_sent: int = unique_s >> 32 + unique_unfin_idx: int = unique_s - (unique_sent << 32) + + if not finished[unique_sent] and self.is_finished( + step, unique_unfin_idx, max_len, len(finalized[unique_sent]), beam_size + ): + finished[unique_sent] = True + newly_finished.append(unique_unfin_idx) + + return newly_finished + + def is_finished( + self, + step: int, + unfin_idx: int, + max_len: int, + finalized_sent_len: int, + beam_size: int, + ): + """ + Check whether decoding for a sentence is finished, which + occurs when the list of finalized sentences has reached the + beam size, or when we reach the maximum length. + """ + assert finalized_sent_len <= beam_size + if finalized_sent_len == beam_size or step == max_len: + return True + return False + + +class EnsembleModel(nn.Module): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__() + self.models_size = len(models) + # method '__len__' is not supported in ModuleList for torch script + self.single_model = models[0] + self.models = nn.ModuleList(models) + + self.has_incremental: bool = False + if all( + hasattr(m, "decoder") and isinstance(m.decoder, FairseqIncrementalDecoder) + for m in models + ): + self.has_incremental = True + + def forward(self): + pass + + def has_encoder(self): + return hasattr(self.single_model, "encoder") + + def has_incremental_states(self): + return self.has_incremental + + def max_decoder_positions(self): + return min([m.max_decoder_positions() for m in self.models if hasattr(m, "max_decoder_positions")] + [sys.maxsize]) + + @torch.jit.export + def forward_encoder(self, net_input: Dict[str, Tensor]): + if not self.has_encoder(): + return None + return [model.encoder.forward_torchscript(net_input) for model in self.models] + + @torch.jit.export + def forward_decoder( + self, + tokens, + encoder_outs: List[Dict[str, List[Tensor]]], + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + temperature: float = 1.0, + ): + log_probs = [] + avg_attn: Optional[Tensor] = None + encoder_out: Optional[Dict[str, List[Tensor]]] = None + for i, model in enumerate(self.models): + if self.has_encoder(): + encoder_out = encoder_outs[i] + # decode each model + if self.has_incremental_states(): + decoder_out = model.decoder.forward( + tokens, + encoder_out=encoder_out, + incremental_state=incremental_states[i], + ) + else: + if hasattr(model, "decoder"): + decoder_out = model.decoder.forward(tokens, encoder_out=encoder_out) + else: + decoder_out = model.forward(tokens) + + attn: Optional[Tensor] = None + decoder_len = len(decoder_out) + if decoder_len > 1 and decoder_out[1] is not None: + if isinstance(decoder_out[1], Tensor): + attn = decoder_out[1] + else: + attn_holder = decoder_out[1]["attn"] + if isinstance(attn_holder, Tensor): + attn = attn_holder + elif attn_holder is not None: + attn = attn_holder[0] + if attn is not None: + attn = attn[:, -1, :] + + decoder_out_tuple = ( + decoder_out[0][:, -1:, :].div_(temperature), + None if decoder_len <= 1 else decoder_out[1], + ) + probs = model.get_normalized_probs( + decoder_out_tuple, log_probs=True, sample=None + ) + probs = probs[:, -1, :] + if self.models_size == 1: + return probs, attn + + log_probs.append(probs) + if attn is not None: + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + + avg_probs = torch.logsumexp(torch.stack(log_probs, dim=0), dim=0) - math.log( + self.models_size + ) + + if avg_attn is not None: + avg_attn.div_(self.models_size) + return avg_probs, avg_attn + + @torch.jit.export + def reorder_encoder_out( + self, encoder_outs: Optional[List[Dict[str, List[Tensor]]]], new_order + ): + """ + Reorder encoder output according to *new_order*. + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + Returns: + *encoder_out* rearranged according to *new_order* + """ + new_outs: List[Dict[str, List[Tensor]]] = [] + if not self.has_encoder(): + return new_outs + for i, model in enumerate(self.models): + assert encoder_outs is not None + new_outs.append( + model.encoder.reorder_encoder_out(encoder_outs[i], new_order) + ) + return new_outs + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + new_order, + ): + if not self.has_incremental_states(): + return + for i, model in enumerate(self.models): + model.decoder.reorder_incremental_state_scripting( + incremental_states[i], new_order + ) + + +class SequenceGeneratorWithAlignment(SequenceGenerator): + def __init__( + self, models, tgt_dict, left_pad_target=False, print_alignment="hard", **kwargs + ): + """Generates translations of a given source sentence. + Produces alignments following "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + Args: + left_pad_target (bool, optional): Whether or not the + hypothesis should be left padded or not when they are + teacher forced for generating alignments. + """ + super().__init__(EnsembleModelWithAlignment(models), tgt_dict, **kwargs) + self.left_pad_target = left_pad_target + + if print_alignment == "hard": + self.extract_alignment = utils.extract_hard_alignment + elif print_alignment == "soft": + self.extract_alignment = utils.extract_soft_alignment + + @torch.no_grad() + def generate(self, models, sample, **kwargs): + finalized = super()._generate(sample, **kwargs) + + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + beam_size = self.beam_size + ( + src_tokens, + src_lengths, + prev_output_tokens, + tgt_tokens, + ) = self._prepare_batch_for_alignment(sample, finalized) + if any(getattr(m, "full_context_alignment", False) for m in self.model.models): + attn = self.model.forward_align(src_tokens, src_lengths, prev_output_tokens) + else: + attn = [ + finalized[i // beam_size][i % beam_size]["attention"].transpose(1, 0) + for i in range(bsz * beam_size) + ] + + if src_tokens.device != "cpu": + src_tokens = src_tokens.to("cpu") + tgt_tokens = tgt_tokens.to("cpu") + attn = [i.to("cpu") for i in attn] + + # Process the attn matrix to extract hard alignments. + for i in range(bsz * beam_size): + alignment = self.extract_alignment( + attn[i], src_tokens[i], tgt_tokens[i], self.pad, self.eos + ) + finalized[i // beam_size][i % beam_size]["alignment"] = alignment + return finalized + + def _prepare_batch_for_alignment(self, sample, hypothesis): + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + src_tokens = ( + src_tokens[:, None, :] + .expand(-1, self.beam_size, -1) + .contiguous() + .view(bsz * self.beam_size, -1) + ) + src_lengths = sample["net_input"]["src_lengths"] + src_lengths = ( + src_lengths[:, None] + .expand(-1, self.beam_size) + .contiguous() + .view(bsz * self.beam_size) + ) + prev_output_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=True, + ) + tgt_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=False, + ) + return src_tokens, src_lengths, prev_output_tokens, tgt_tokens + + +class EnsembleModelWithAlignment(EnsembleModel): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__(models) + + def forward_align(self, src_tokens, src_lengths, prev_output_tokens): + avg_attn = None + for model in self.models: + decoder_out = model(src_tokens, src_lengths, prev_output_tokens) + attn = decoder_out[1]["attn"][0] + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + if len(self.models) > 1: + avg_attn.div_(len(self.models)) + return avg_attn diff --git a/SpeechT5/Speech2C/speech2c/tasks/speech2c_pretraining.py b/SpeechT5/Speech2C/speech2c/tasks/speech2c_pretraining.py new file mode 100644 index 0000000000000000000000000000000000000000..de275630bb08ad3ffae5120eee93d0c75d9ed8b0 --- /dev/null +++ b/SpeechT5/Speech2C/speech2c/tasks/speech2c_pretraining.py @@ -0,0 +1,91 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import logging + +from dataclasses import dataclass, field +from fairseq.data import Dictionary +from fairseq.tasks import register_task +from fairseq.tasks.hubert_pretraining import HubertPretrainingConfig, HubertPretrainingTask, LabelEncoder +from speech2c.data.speech2c_dataset import Speech2cDataset + +logger = logging.getLogger(__name__) + + +@dataclass +class Speech2cPretrainingConfig(HubertPretrainingConfig): + add_decoder: bool = field( + default=False, + metadata={"help": "whether to add decoder for CE Loss on code"}, + ) + + # For inference + ctc_weight: float = field( + default=0.0, + metadata={"help": "ctc weight during inference"}, + ) + + +@register_task("speech2c_pretraining", dataclass=Speech2cPretrainingConfig) +class Speech2cPretrainingTask(HubertPretrainingTask): + + cfg: Speech2cPretrainingConfig + + def load_dictionaries(self): + label_dir = self.cfg.data if self.cfg.label_dir is None else self.cfg.label_dir + dictionaries = [Dictionary.load(f"{label_dir}/dict.{label}.txt") for label in self.cfg.labels] + return dictionaries[0] if self.cfg.fine_tuning else dictionaries + + def load_dataset(self, split: str, **kwargs) -> None: + manifest = f"{self.cfg.data}/{split}.tsv" + dicts = [self.target_dictionary] if self.cfg.fine_tuning else self.dictionaries + pad_list = [dict.pad() for dict in dicts] + eos_list = [dict.eos() for dict in dicts] + procs = [LabelEncoder(dict) for dict in dicts] + paths = [ + f"{self.get_label_dir()}/{split}.{l}" for l in self.cfg.labels + ] + + # hubert v1: pad_audio=True, random_crop=False; + self.datasets[split] = Speech2cDataset( + manifest, + sample_rate=self.cfg.sample_rate, + label_paths=paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_keep_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=False, + random_crop=self.cfg.random_crop, + single_target=self.cfg.single_target, + tgt_dict=dicts[0], + add_decoder=self.cfg.add_decoder, + fine_tuning=self.cfg.fine_tuning, + ) + + def build_generator( + self, + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=None, + ): + from speech2c.squence_generator import SequenceGenerator + extra_gen_cls_kwargs = { + "ctc_weight": self.cfg.ctc_weight, + **extra_gen_cls_kwargs + } + return super().build_generator( + models, args, seq_gen_cls=SequenceGenerator, extra_gen_cls_kwargs=extra_gen_cls_kwargs + ) diff --git a/SpeechT5/Speech2S/README.md b/SpeechT5/Speech2S/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fc827e237111d872dac19ce407b8d11e52a5ee44 --- /dev/null +++ b/SpeechT5/Speech2S/README.md @@ -0,0 +1,64 @@ +# Speech2S + + + [**Joint Pre-Training with Speech and Bilingual Text for Direct Speech to Speech Translation**](https://arxiv.org/abs/2210.17027) + + +- (Updating) Nov. 2022: release the code and models +- Nov. 2022: release preprint in [arXiv](https://arxiv.org/abs/2210.17027) + +## Pre-Trained and Fine-tuned Models + +| Model | Pre-training Dataset | Fine-tuning Dataset | Model | +| :------: | :----------------------------------------------: | :-----------------: | :-----: | +| Speech2S_enes | Voxpopuli_en_v2 | - | [Google Drive](https://drive.google.com/file/d/1TYypFiEKoCixUro8FTTG23bRZYwAxhkX/view?usp=share_link) | +| Speech2S_enes | Voxpopuli_en_v2 | Voxpopuli_s2s | [Google Drive](https://drive.google.com/file/d/11RxeKznSrHcoP_KK9A1VgwRt3fNh_U_C/view?usp=share_link) | +| Speech2S_esen | Voxpopuli_es_v2 | - | [Google Drive](https://drive.google.com/file/d/1NoC7W-UtQZ-ugIptF1ex0ZlGJncsT1S4/view?usp=share_link) | +| Speech2S_esen | Voxpopuli_es_v2 | Voxpopuli_s2s | [Google Drive](https://drive.google.com/file/d/1eNcKw4ZWGmcABWXJxlf6MKocmiPrKSkH/view?usp=share_link) | + + +## Setup +``` +cd Speech2S/speech2s +pip install --editable fairseq/ +``` + +## Data Preparation +Please follow the steps of data preparation for S2ST in [here](https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_speech/docs/enhanced_direct_s2st_discrete_units.md). + +## Pre-Training +``` +cd speech2s/stpretrain_scripts +base_sc2c_enes.sh +``` +## Finetune +``` +cd speech2s/stpretrain_scripts +finetune_enes.sh +``` +## Inference +``` +cd speech2s/stpretrain_scripts +inference_ed.sh +``` +## Results on Voxpopuli and Covst + + +## License + +This project is licensed under the license found in the LICENSE file in the root directory of this source tree. +Portions of the source code are based on the [FAIRSEQ](https://github.com/pytorch/fairseq). + +[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct) + +## Reference + +If you find our work is useful in your research, please cite the following paper: +```bibtex +@article{wei2022joint, + title={Joint Pre-Training with Speech and Bilingual Text for Direct Speech to Speech Translation}, + author={Wei, Kun and Zhou, Long and Zhang, Ziqiang and Chen, Liping and Liu, Shujie and He, Lei and Li, Jinyu and Wei, Furu}, + journal={arXiv preprint arXiv:2210.17027}, + year={2022} +} +``` diff --git a/SpeechT5/Speech2S/speech2s/__init__.py b/SpeechT5/Speech2S/speech2s/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97327d269e93a13cd135f6c1a187fd820a8decb8 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/__init__.py @@ -0,0 +1 @@ +from . import data, tasks, criterions, models diff --git a/SpeechT5/Speech2S/speech2s/config/finetune_asr/speechut_base_100h.yaml b/SpeechT5/Speech2S/speech2s/config/finetune_asr/speechut_base_100h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..736c3c72b9a7ba85eacaf44e1952fa7f0fc15a4f --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/config/finetune_asr/speechut_base_100h.yaml @@ -0,0 +1,101 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 100 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 1 + keep_last_epochs: 1 + keep_best_checkpoints: 5 + best_checkpoint_metric: dec_accuracy + maximize_best_checkpoint_metric: true + restore_file: checkpoint_last.pt + +distributed_training: + ddp_backend: legacy_ddp + find_unused_parameters: true + distributed_world_size: 1 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: joint_sc2t_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: false # must be consistent with pre-training + labels: ["ltr"] + store_labels: true + single_target: true + add_decoder_target: true + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +dataset: + num_workers: 0 + max_tokens: 1300000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_100 + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: ctc_ce + zero_infinity: true + +optimization: + max_update: 40000 + lr: [0.00001] + sentence_avg: true + update_freq: [2] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: speechut_asr + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + add_decoder: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/config/finetune_asr/speechut_large_100h.yaml b/SpeechT5/Speech2S/speech2s/config/finetune_asr/speechut_large_100h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7cbc59e61f10ab00b997286d6355f22ce1008677 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/config/finetune_asr/speechut_large_100h.yaml @@ -0,0 +1,102 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 100 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 1 + keep_last_epochs: 5 + keep_best_checkpoints: 5 + best_checkpoint_metric: dec_accuracy + maximize_best_checkpoint_metric: true + restore_file: checkpoint_last.pt + +distributed_training: + ddp_backend: legacy_ddp + find_unused_parameters: true + distributed_world_size: 16 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: joint_sc2t_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: true # must be consistent with pre-training + labels: ["ltr"] + store_labels: true + single_target: true + add_decoder_target: true + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +dataset: + num_workers: 0 + max_tokens: 1300000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_100 + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: ctc_ce + zero_infinity: true + +optimization: + max_update: 40000 + lr: [0.00001] + sentence_avg: true + update_freq: [2] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: speechut_asr + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + add_decoder: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/config/finetune_asr/speechut_large_960h.yaml b/SpeechT5/Speech2S/speech2s/config/finetune_asr/speechut_large_960h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f10d6002555e5cbcfbf31035d8258e77abc26050 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/config/finetune_asr/speechut_large_960h.yaml @@ -0,0 +1,100 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 100 + tensorboard_logdir: tblog + +checkpoint: + save_interval: 1 + keep_last_epochs: 5 + keep_best_checkpoints: 5 + best_checkpoint_metric: dec_accuracy + maximize_best_checkpoint_metric: true + restore_file: checkpoint_last.pt + +distributed_training: + ddp_backend: legacy_ddp + find_unused_parameters: true + distributed_world_size: 24 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: joint_sc2t_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: true # must be consistent with pre-training + labels: ["ltr"] + store_labels: true + single_target: true + add_decoder_target: true + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +dataset: + num_workers: 0 + max_tokens: 1300000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_960 + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: ctc_ce + zero_infinity: true + +optimization: + max_update: 40000 + lr: [0.00001] + sentence_avg: true + update_freq: [2] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: speechut_asr + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.0 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + add_decoder: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/config/pretrain/speechut_base_librispeech.yaml b/SpeechT5/Speech2S/speech2s/config/pretrain/speechut_base_librispeech.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a3751febf2efc3cbf7a91e3a75f05b570559f2c --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/config/pretrain/speechut_base_librispeech.yaml @@ -0,0 +1,153 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 4 + keep_last_epochs: 4 + save_interval_updates: 50000 + keep_interval_updates: -1 + keep_interval_updates_pattern: 50000 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_port: -1 + distributed_world_size: 32 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: joint_sc2t_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + store_labels: true + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: false # must be consistent with extractor + add_decoder_target: true + text_cfg: + seed: ${common.seed} + text_data: ??? + data_config: config.yaml + sample_break_mode: eos + tokens_per_sample: 1024 + shorten_method: "random_crop" + text_maxtokens_ratio: 1.5 + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 1 + +criterion: + _name: speechut_criterion + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + label_smoothing: 0.1 + u2t_ed_weight: 0.1 + u2t_ctc_weight: 0.1 + text_mum_weight: 0.5 + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: speechut + label_rate: ??? + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: default + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + activation_fn: "gelu" + encoder_layers: 6 + encoder_attention_heads: 8 + encoder_layerdrop: 0.0 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + add_unit_encoder: true + add_text_ctc: true + mask_u2t: false + mix_with_unit: true + add_decoder: true + reset_decoder_embedding_config: true + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + max_source_positions: 3000 + max_target_positions: 3000 + no_scale_embedding: true + layernorm_embedding: true + no_token_positional_embeddings: false + share_decoder_input_output_embed: false + encoder: + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 8 + normalize_before: false + learned_pos: true + layerdrop: ${model.encoder_layerdrop} + decoder: + layerdrop: 0.1 + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 12 + normalize_before: false + learned_pos: false + output_dim: 768 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/config/pretrain/speechut_large_librilight.yaml b/SpeechT5/Speech2S/speech2s/config/pretrain/speechut_large_librilight.yaml new file mode 100644 index 0000000000000000000000000000000000000000..849c1d986126f6e26f3e10feb14fae0a299be4b4 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/config/pretrain/speechut_large_librilight.yaml @@ -0,0 +1,159 @@ +# @package _group_ + +common: + fp16: true + fp16_scale_tolerance: 0.1 # alleviate fp16 overflow issue + log_format: json + log_interval: 200 + seed: 1234 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 1 + keep_last_epochs: 4 + save_interval_updates: 10000 + keep_interval_updates: -1 + keep_interval_updates_pattern: 10000 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_port: -1 + distributed_world_size: 128 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: joint_sc2t_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + store_labels: true + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: true # must be consistent with extractor + add_decoder_target: true + text_cfg: + seed: ${common.seed} + text_data: ??? + data_config: config.yaml + sample_break_mode: eos + tokens_per_sample: 1024 + shorten_method: "random_crop" + text_maxtokens_ratio: 1.4 + +dataset: + num_workers: 6 + max_tokens: 900000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 2 + +criterion: + _name: speechut_criterion + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + label_smoothing: 0.1 + u2t_ed_weight: 0.1 + u2t_ctc_weight: 0.1 + text_mum_weight: 0.5 + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 1.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + end_learning_rate: 0.00015 # for future longger pre-training, e.g. 600K step + +model: + _name: speechut + label_rate: ??? + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: layer_norm + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 768 + activation_fn: "gelu" + encoder_layers: 12 + encoder_attention_heads: 16 + encoder_layerdrop: 0.0 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + layer_norm_first: true + feature_grad_mult: 1.0 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + add_unit_encoder: true + add_text_ctc: true + mask_u2t: false + mix_with_unit: true + add_decoder: true + reset_decoder_embedding_config: true + scaling_for_att: 32 # alleviate fp16 overflow issue + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + max_source_positions: 3000 + max_target_positions: 3000 + no_scale_embedding: true + layernorm_embedding: true + no_token_positional_embeddings: true + share_decoder_input_output_embed: false + encoder: + embed_dim: 1024 + ffn_embed_dim: 4096 + layers: 12 + attention_heads: 16 + normalize_before: false + learned_pos: true + layerdrop: ${model.encoder_layerdrop} + decoder: + layerdrop: 0.1 + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 12 + normalize_before: false + learned_pos: false + output_dim: 768 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/criterions/__init__.py b/SpeechT5/Speech2S/speech2s/criterions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2bf9fac9a8c00d76decd07417d86a2625c4c851c --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/criterions/__init__.py @@ -0,0 +1,9 @@ +import importlib +import os + +for file in os.listdir(os.path.dirname(__file__)): + if file.endswith(".py") and not file.startswith("_"): + criterion_name = file[: file.find(".py")] + importlib.import_module( + "speechut.criterions." + criterion_name + ) diff --git a/SpeechT5/Speech2S/speech2s/criterions/ctc_ce.py b/SpeechT5/Speech2S/speech2s/criterions/ctc_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..aab6c9d23ac3b7dc410704bcba8982a697a57656 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/criterions/ctc_ce.py @@ -0,0 +1,414 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import math +from argparse import Namespace +from dataclasses import dataclass, field +from omegaconf import II +from typing import Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.dataclass import FairseqDataclass +from fairseq.data.data_utils import post_process +from fairseq.tasks import FairseqTask +from fairseq.logging.meters import safe_round + + +@dataclass +class CtcCeCriterionConfig(FairseqDataclass): + zero_infinity: bool = field( + default=False, + metadata={"help": "zero inf loss when source length <= target length"}, + ) + sentence_avg: bool = II("optimization.sentence_avg") + post_process: str = field( + default="letter", + metadata={ + "help": "how to post process predictions into words. can be letter, " + "wordpiece, BPE symbols, etc. " + "See fairseq.data.data_utils.post_process() for full list of options" + }, + ) + wer_kenlm_model: Optional[str] = field( + default=None, + metadata={ + "help": "if this is provided, use kenlm to compute wer (along with other wer_* args)" + }, + ) + wer_lexicon: Optional[str] = field( + default=None, + metadata={"help": "lexicon to use with wer_kenlm_model"}, + ) + wer_lm_weight: float = field( + default=2.0, + metadata={"help": "lm weight to use with wer_kenlm_model"}, + ) + wer_word_score: float = field( + default=-1.0, + metadata={"help": "lm word score to use with wer_kenlm_model"}, + ) + + wer_args: Optional[str] = field( + default=None, + metadata={ + "help": "DEPRECATED: tuple of (wer_kenlm_model, wer_lexicon, wer_lm_weight, wer_word_score)" + }, + ) + + dec_weight: float = field( + default=0.5, + metadata={"help": "weights for decoder CE Loss, loss will be ((1 - dec_weight) * hubert_loss + dec_weight * CE_Loss)"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + label_smoothing: float = field( + default=0.1, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + + +@register_criterion("ctc_ce", dataclass=CtcCeCriterionConfig) +class CtcCeCriterion(FairseqCriterion): + def __init__(self, cfg: CtcCeCriterionConfig, task: FairseqTask): + super().__init__(task) + self.blank_idx = ( + task.target_dictionary.index(task.blank_symbol) + if hasattr(task, "blank_symbol") + else 0 + ) + self.pad_idx = task.target_dictionary.pad() + self.eos_idx = task.target_dictionary.eos() + self.post_process = cfg.post_process + + if cfg.wer_args is not None: + ( + cfg.wer_kenlm_model, + cfg.wer_lexicon, + cfg.wer_lm_weight, + cfg.wer_word_score, + ) = eval(cfg.wer_args) + + if cfg.wer_kenlm_model is not None: + from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder + + dec_args = Namespace() + dec_args.nbest = 1 + dec_args.criterion = "ctc" + dec_args.kenlm_model = cfg.wer_kenlm_model + dec_args.lexicon = cfg.wer_lexicon + dec_args.beam = 50 + dec_args.beam_size_token = min(50, len(task.target_dictionary)) + dec_args.beam_threshold = min(50, len(task.target_dictionary)) + dec_args.lm_weight = cfg.wer_lm_weight + dec_args.word_score = cfg.wer_word_score + dec_args.unk_weight = -math.inf + dec_args.sil_weight = 0 + + self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary) + else: + self.w2l_decoder = None + + self.zero_infinity = cfg.zero_infinity + self.sentence_avg = cfg.sentence_avg + + self.dec_weight = cfg.dec_weight + self.report_accuracy = cfg.report_accuracy + self.ignore_prefix_size = cfg.ignore_prefix_size + self.eps = cfg.label_smoothing + + def forward(self, model, sample, reduce=True): + net_output = model(**sample["net_input"]) + lprobs = model.get_normalized_probs( + net_output, log_probs=True + ).contiguous() # (T, B, C) from the encoder + + if "src_lengths" in sample["net_input"]: + input_lengths = sample["net_input"]["src_lengths"] + else: + if net_output["padding_mask"] is not None: + non_padding_mask = ~net_output["padding_mask"] + input_lengths = non_padding_mask.long().sum(-1) + else: + input_lengths = lprobs.new_full( + (lprobs.size(1),), lprobs.size(0), dtype=torch.long + ) + + pad_mask = (sample["target"] != self.pad_idx) & ( + sample["target"] != self.eos_idx + ) + targets_flat = sample["target"].masked_select(pad_mask) + if "target_lengths" in sample: + target_lengths = sample["target_lengths"] + else: + target_lengths = pad_mask.sum(-1) + + with torch.backends.cudnn.flags(enabled=False): + loss = F.ctc_loss( + lprobs, + targets_flat, + input_lengths, + target_lengths, + blank=self.blank_idx, + reduction="sum", + zero_infinity=self.zero_infinity, + ) + + ntokens = ( + sample["ntokens"] if "ntokens" in sample else target_lengths.sum().item() + ) + + sample_size = sample["target"].size(0) if self.sentence_avg else ntokens + + logging_output = {} + if "decoder_target" in sample: + if net_output["decoder_out"] is not None: + dec_sample_size = sample["target"].size(0) if self.sentence_avg else sample["dec_ntokens"] + dec_loss, dec_nll_loss = self.compute_ce_loss(model, net_output["decoder_out"], sample, reduce=reduce) + logging_output["ctc_loss"] = loss.item() + loss = (1 - self.dec_weight) * loss + (self.dec_weight * dec_loss * sample_size / dec_sample_size) + logging_output["dec_loss"] = dec_loss.item() + logging_output["dec_nll_loss"] = dec_nll_loss.item() + logging_output["dec_sample_size"] = dec_sample_size + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output["decoder_out"], sample) + logging_output["dec_n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + else: + logging_output["ctc_loss"] = loss.item() + loss = (1 - self.dec_weight) * loss + logging_output["dec_loss"] = 0 + logging_output["dec_nll_loss"] = 0 + logging_output["dec_sample_size"] = 1 + if self.report_accuracy: + logging_output["dec_n_correct"] = 0 + logging_output["total"] = 1 + + logging_output = { + "loss": utils.item(loss.data), # * sample['ntokens'], + "ntokens": ntokens, + "nsentences": sample["id"].numel(), + "sample_size": sample_size, + **logging_output, + } + + if not model.training and self.dec_weight < 1.0: + import editdistance + + with torch.no_grad(): + lprobs_t = lprobs.transpose(0, 1).float().contiguous().cpu() + + c_err = 0 + c_len = 0 + w_errs = 0 + w_len = 0 + wv_errs = 0 + for lp, t, inp_l in zip( + lprobs_t, + sample["target_label"] + if "target_label" in sample + else sample["target"], + input_lengths, + ): + lp = lp[:inp_l].unsqueeze(0) + + decoded = None + if self.w2l_decoder is not None: + decoded = self.w2l_decoder.decode(lp) + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + + p = (t != self.task.target_dictionary.pad()) & ( + t != self.task.target_dictionary.eos() + ) + targ = t[p] + targ_units = self.task.target_dictionary.string(targ) + targ_units_arr = targ.tolist() + + toks = lp.argmax(dim=-1).unique_consecutive() + pred_units_arr = toks[toks != self.blank_idx].tolist() + + c_err += editdistance.eval(pred_units_arr, targ_units_arr) + c_len += len(targ_units_arr) + + targ_words = post_process(targ_units, self.post_process).split() + + pred_units = self.task.target_dictionary.string(pred_units_arr) + pred_words_raw = post_process(pred_units, self.post_process).split() + + if decoded is not None and "words" in decoded: + pred_words = decoded["words"] + w_errs += editdistance.eval(pred_words, targ_words) + wv_errs += editdistance.eval(pred_words_raw, targ_words) + else: + dist = editdistance.eval(pred_words_raw, targ_words) + w_errs += dist + wv_errs += dist + + w_len += len(targ_words) + + logging_output["wv_errors"] = wv_errs + logging_output["w_errors"] = w_errs + logging_output["w_total"] = w_len + logging_output["c_errors"] = c_err + logging_output["c_total"] = c_len + + return loss, sample_size, logging_output + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.pad_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.pad_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = sample["decoder_target"] + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + nsentences = utils.item( + sum(log.get("nsentences", 0) for log in logging_outputs) + ) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar("ntokens", ntokens) + metrics.log_scalar("nsentences", nsentences) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + + c_errors = sum(log.get("c_errors", 0) for log in logging_outputs) + metrics.log_scalar("_c_errors", c_errors) + c_total = sum(log.get("c_total", 0) for log in logging_outputs) + metrics.log_scalar("_c_total", c_total) + w_errors = sum(log.get("w_errors", 0) for log in logging_outputs) + metrics.log_scalar("_w_errors", w_errors) + wv_errors = sum(log.get("wv_errors", 0) for log in logging_outputs) + metrics.log_scalar("_wv_errors", wv_errors) + w_total = sum(log.get("w_total", 0) for log in logging_outputs) + metrics.log_scalar("_w_total", w_total) + + if c_total > 0: + metrics.log_derived( + "uer", + lambda meters: safe_round( + meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3 + ) + if meters["_c_total"].sum > 0 + else float("nan"), + ) + if w_total > 0: + metrics.log_derived( + "wer", + lambda meters: safe_round( + meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + metrics.log_derived( + "raw_wer", + lambda meters: safe_round( + meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + + if "dec_loss" in logging_outputs[0]: + ctc_loss_sum = sum(log.get("ctc_loss", 0) for log in logging_outputs) + dec_loss_sum = sum(log.get("dec_loss", 0) for log in logging_outputs) + dec_nll_loss_sum = sum(log.get("dec_nll_loss", 0) for log in logging_outputs) + dec_sample_size = sum(log.get("dec_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "dec_loss", dec_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_scalar( + "ctc_loss", ctc_loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "dec_nll_loss", dec_nll_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_derived( + "dec_ppl", lambda meters: utils.get_perplexity(meters["dec_nll_loss"].avg) + ) + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("dec_n_correct", n_correct) + metrics.log_derived( + "dec_accuracy", + lambda meters: round( + meters["dec_n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/SpeechT5/Speech2S/speech2s/criterions/speechut_criterion.py b/SpeechT5/Speech2S/speech2s/criterions/speechut_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..0d735f1efd16aebf4146e26d5a5ebaeca2516ad7 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/criterions/speechut_criterion.py @@ -0,0 +1,384 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import math +import re +from dataclasses import dataclass, field +from typing import List, Optional + +import numpy as np +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.dataclass import FairseqDataclass + +logger = logging.getLogger(__name__) + +@dataclass +class SpeechUTCriterionConfig(FairseqDataclass): + pred_masked_weight: float = field( + default=1.0, + metadata={"help": "weight for predictive loss for masked frames"}, + ) + pred_nomask_weight: float = field( + default=0.0, + metadata={"help": "weight for predictive loss for unmasked frames"}, + ) + loss_weights: Optional[List[float]] = field( + default=None, + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + log_keys: List[str] = field( + default_factory=lambda: [], + metadata={"help": "output keys to log"}, + ) + u2t_ed_weight: float = field( + default=0.1, + metadata={"help": "weights for text ED Loss, loss will be (hubert_loss + text_mum_weight * MUM_Loss + u2t_ed_weight * CE_Loss + u2t_ctc_weight * CTC_loss)"}, + ) + u2t_ctc_weight: float = field( + default=0.0, + metadata={"help": "weights for text ED Loss, loss will be (hubert_loss + text_mum_weight * MUM_Loss + u2t_ed_weight * CE_Loss + u2t_ctc_weight * CTC_loss)"}, + ) + text_mum_weight: float = field( + default=0.0, + metadata={"help": "masked unit modeling weight from the text end"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + no_ctc_blank: bool = field( + default=False, + metadata={"help": "mask out the blank of ctc, only when dec_loss_type=ctc"}, + ) + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + +@register_criterion("speechut_criterion", dataclass=SpeechUTCriterionConfig) +class SpeechUTCriterion(FairseqCriterion): + def __init__( + self, + task, + pred_masked_weight, + pred_nomask_weight, + loss_weights=None, + log_keys=None, + u2t_ed_weight=0.1, + u2t_ctc_weight=0, + text_mum_weight=0, + report_accuracy=False, + ignore_prefix_size=0, + label_smoothing=0, + no_ctc_blank=False, + ): + super().__init__(task) + self.pred_masked_weight = pred_masked_weight + self.pred_nomask_weight = pred_nomask_weight + self.loss_weights = loss_weights + self.log_keys = [] if log_keys is None else log_keys + self.u2t_ed_weight = u2t_ed_weight + self.u2t_ctc_weight = u2t_ctc_weight + self.text_mum_weight = text_mum_weight + self.report_accuracy = report_accuracy + self.ignore_prefix_size = ignore_prefix_size + self.eps = label_smoothing + self.no_ctc_blank = no_ctc_blank + self.padding_idx = task.dictionaries[0].pad() + self.eos_idx = task.dictionaries[0].eos() + self.blank_idx = task.dictionaries[0].bos() + + def compute_hubert_loss(self, model, net_output, reduction, preffix='', suffix=''): + loss = 0 + sample_size = [] + logging_output = {} + loss_m_list = [] + logp_m_list = model.get_logits(net_output, True) + targ_m_list = model.get_targets(net_output, True) + assert self.pred_masked_weight == 0 or len(logp_m_list) > 0 + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"{preffix}loss_m_{i}"] = loss_m.detach().item() + if self.pred_masked_weight > 0: + loss += self.pred_masked_weight * sum(loss_m_list) + sample_size.append(targ_m_list[0].numel()) + + loss_u_list = [] + logp_u_list = model.get_logits(net_output, False) + targ_u_list = model.get_targets(net_output, False) + assert self.pred_nomask_weight == 0 or len(logp_u_list) > 0 + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"{preffix}loss_u_{i}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss += self.pred_nomask_weight * sum(loss_u_list) + sample_size.append(targ_u_list[0].numel()) + + sample_size = np.mean(sample_size) + + def compute_correct(logits, targets): + if logits.numel() == 0: + return 0, 0 + else: + assert logits.dim() > 1, logits.shape + max = logits.argmax(-1) == targets + min = logits.argmin(-1) == targets + both = max & min + corr = max.long().sum().item() - both.long().sum().item() + count = max.numel() + return corr, count + + with torch.no_grad(): + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + corr_m, count_m = compute_correct(logp_m, targ_m) + logging_output[f"correct_m_{i}{suffix}"] = corr_m + logging_output[f"count_m_{i}{suffix}"] = count_m + + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + corr_u, count_u = compute_correct(logp_u, targ_u) + logging_output[f"correct_u_{i}{suffix}"] = corr_u + logging_output[f"count_u_{i}{suffix}"] = count_u + + return loss, sample_size, logging_output + + + def forward(self, model, sample, reduce=True, log_pred=False): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + reduction = "sum" if reduce else "none" + + if "net_input" in sample: + unit_sample = text_sample = None + else: + unit_sample = sample.get("text_mono", None) + text_sample = sample.get("text_paired", None) + assert unit_sample is not None or text_sample is not None + sample = sample.get("speech") + + ### 1. S2U: do hubert forward and loss computation + sample["modality"] = "speech" + net_output = model(target_list=sample["target_list"], **sample["net_input"]) + loss, sample_size, logging_output = self.compute_hubert_loss( + model, + net_output, + reduction, + ) + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len( + self.loss_weights + ), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss += p + logging_output[f"loss_{n}"] = p.item() + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + ### 2. do text U2T forward and loss computation + if text_sample is not None and (self.u2t_ctc_weight + self.u2t_ed_weight) > 0: + ## 2.1 re-loading "target_list", in default case, target_list = [src_tokens], + ## while in case of using "unit-phone-char" structure, target_list will be [ref_tokens] + text_sample["net_input"]["target_list"] = [ + text_sample.get("ref_tokens", text_sample["net_input"]["src_tokens"].clone()), + ] + text_net_output = model(**text_sample["net_input"]) + text_sample_size = text_sample["ntokens"] + + ### 2.1 U2T_UCTC + if self.u2t_ctc_weight > 0: + text_ctc_loss = self.compute_ctc_loss(model, text_net_output, text_sample["target"], reduction=reduction) + loss += self.u2t_ctc_weight * text_ctc_loss * sample_size / text_sample_size + logging_output["text_ctc_loss"] = utils.item(text_ctc_loss) + logging_output["text_sample_size"] = text_sample_size + + ### 2.2 U2T_ED + if self.u2t_ed_weight > 0: + text_dec_loss, text_dec_nll_loss = self.compute_ce_loss(model, text_net_output["decoder_out"], text_sample, reduce=reduce) + loss += self.u2t_ed_weight * text_dec_loss * sample_size / text_sample_size + logging_output["text_dec_loss"] = utils.item(text_dec_loss) + logging_output["text_dec_nll_loss"] = utils.item(text_dec_nll_loss) + logging_output["text_sample_size"] = text_sample_size + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, text_net_output["decoder_out"], text_sample) + logging_output["correct_text_dec"] = utils.item(n_correct.data) + logging_output["count_text_dec"] = utils.item(total.data) + + ### 3. do unit MUM forward and loss computation + if unit_sample is not None and self.text_mum_weight > 0: + src_tokens = unit_sample["net_input"]["src_tokens"] + target = unit_sample.get("target", None) + target = src_tokens.clone() if target is None else target + unit_net_output = model.forward_mum(src_tokens, target) + loss_num, sample_size_mum, logging_output_mum = self.compute_hubert_loss( + model, + unit_net_output, + reduction, + preffix="mum_", + suffix="_mum", + ) + loss += self.text_mum_weight * loss_num * sample_size / sample_size_mum + logging_output["unit_sample_size"] = sample_size_mum + logging_output.update(logging_output_mum) + + logging_output = { + "loss": utils.item(loss) if reduce else loss, + "ntokens": sample_size, + "nsentences": sample["id"].numel() + (text_sample["id"].numel() if text_sample is not None else 0), + "sample_size": sample_size, + **logging_output, + } + + return loss, sample_size, logging_output + + def compute_ctc_loss(self, model, net_output, target, reduction): + logits = net_output["encoder_out_ctc"][0] # (T, B, C) from the code-encoder + if self.no_ctc_blank: + ## set prob of to -inf + logits = logits.float() + logits[:, :, self.blank_idx] = -1000000.0 + + lprobs = F.log_softmax(logits.float(), dim=-1) + + encoder_padding_mask = net_output["encoder_padding_mask"][0] + non_padding_mask = ~encoder_padding_mask + input_lengths = non_padding_mask.long().sum(-1) + pad_mask = (target != self.padding_idx) & (target != self.eos_idx) + targets_flat = target.masked_select(pad_mask) + target_lengths = pad_mask.sum(-1) + + with torch.backends.cudnn.flags(enabled=False): + loss = F.ctc_loss( + lprobs, + targets_flat, + input_lengths, + target_lengths, + blank=self.blank_idx, + reduction=reduction, + zero_infinity=True, + ) + return loss + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.padding_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.padding_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = sample["target"] + + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training (copied from normal cross entropy).""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + else: + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + counts = {} + for lk in logging_outputs[0].keys(): + if lk.startswith("count_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val) + counts[lk] = val + + for lk in logging_outputs[0].keys(): + if lk.startswith("loss_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / sample_size / math.log(2), round=3) + elif lk.startswith("correct_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)]) + + if "text_sample_size" in logging_outputs[0]: + text_sample_size = sum(log.get("text_sample_size", 0) for log in logging_outputs) + for lk in logging_outputs[0].keys(): + if lk.startswith("text_") and lk.endswith("_loss"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / text_sample_size / math.log(2), round=3) + + if "unit_sample_size" in logging_outputs[0]: + unit_sample_size = sum(log.get("unit_sample_size", 0) for log in logging_outputs) + for lk in logging_outputs[0].keys(): + if lk.startswith("mum_loss_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / unit_sample_size / math.log(2), round=3) + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + raise NotImplementedError() + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/SpeechT5/Speech2S/speech2s/data/concat_dataset.py b/SpeechT5/Speech2S/speech2s/data/concat_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..5766921ac39b571010b318e0d4b6f967cd21d96e --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/data/concat_dataset.py @@ -0,0 +1,129 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import bisect + +import numpy as np +from torch.utils.data.dataloader import default_collate + +from fairseq.data import FairseqDataset + + +class ConcatDataset(FairseqDataset): + @staticmethod + def cumsum(sequence, sample_ratios): + r, s = [], 0 + for e, ratio in zip(sequence, sample_ratios): + curr_len = int(ratio * len(e)) + r.append(curr_len + s) + s += curr_len + return r + + def __init__(self, datasets, sample_ratios=1): + super(ConcatDataset, self).__init__() + assert len(datasets) > 0, "datasets should not be an empty iterable" + self.datasets = list(datasets) + if isinstance(sample_ratios, int): + sample_ratios = [sample_ratios] * len(self.datasets) + self.sample_ratios = sample_ratios + self.cumulative_sizes = self.cumsum(self.datasets, sample_ratios) + self.real_sizes = [len(d) for d in self.datasets] + + def __len__(self): + return self.cumulative_sizes[-1] + + def __getitem__(self, idx): + dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx) + return self.datasets[dataset_idx][sample_idx] + + def _get_dataset_and_sample_index(self, idx: int): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + sample_idx = sample_idx % self.real_sizes[dataset_idx] + return dataset_idx, sample_idx + + def collater(self, samples, **extra_args): + # For now only supports datasets with same underlying collater implementations + if hasattr(self.datasets[0], "collater"): + return self.datasets[0].collater(samples, **extra_args) + else: + return default_collate(samples, **extra_args) + + def size(self, idx: int): + """ + Return an example's size as a float or tuple. + """ + dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx) + return self.datasets[dataset_idx].size(sample_idx) + + def num_tokens(self, index: int): + return np.max(self.size(index)) + + def attr(self, attr: str, index: int): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, index) + return getattr(self.datasets[dataset_idx], attr, None) + + @property + def sizes(self): + _dataset_sizes = [] + for ds, sr in zip(self.datasets, self.sample_ratios): + if isinstance(ds.sizes, np.ndarray): + _dataset_sizes.append(np.tile(ds.sizes, sr)) + else: + # Only support underlying dataset with single size array. + assert isinstance(ds.sizes, list) + _dataset_sizes.append(np.tile(ds.sizes[0], sr)) + return np.concatenate(_dataset_sizes) + + @property + def supports_prefetch(self): + return all(d.supports_prefetch for d in self.datasets) + + def ordered_indices(self): + """ + Returns indices sorted by length. So less padding is needed. + """ + if isinstance(self.sizes, np.ndarray) and len(self.sizes.shape) > 1: + # special handling for concatenating lang_pair_datasets + if getattr(self.datasets[0], "shuffle", False): + indices = np.random.permutation(len(self)).astype(np.int64) + else: + indices = np.arange(len(self), dtype=np.int64) + sizes = self.sizes + tgt_sizes = ( + sizes[:, 1] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else None + ) + src_sizes = ( + sizes[:, 0] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else sizes + ) + # sort by target length, then source length + if tgt_sizes is not None: + indices = indices[np.argsort(tgt_sizes[indices], kind="mergesort")] + return indices[np.argsort(src_sizes[indices], kind="mergesort")] + else: + return np.argsort(self.sizes) + + def prefetch(self, indices): + frm = 0 + for to, ds in zip(self.cumulative_sizes, self.datasets): + real_size = len(ds) + if getattr(ds, "supports_prefetch", False): + ds.prefetch([(i - frm) % real_size for i in indices if frm <= i < to]) + frm = to + + @property + def can_reuse_epoch_itr_across_epochs(self): + return all(d.can_reuse_epoch_itr_across_epochs for d in self.datasets) + + def set_epoch(self, epoch): + super().set_epoch(epoch) + for ds in self.datasets: + if hasattr(ds, "set_epoch"): + ds.set_epoch(epoch) diff --git a/SpeechT5/Speech2S/speech2s/data/hubert_dataset.py b/SpeechT5/Speech2S/speech2s/data/hubert_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..64965dea445a0a5afc63c887b1bc89cece0b203b --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/data/hubert_dataset.py @@ -0,0 +1,597 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import itertools +import logging +import io +import os +import sys +import time +from pathlib import Path +from typing import Any, List, Optional, Union, Tuple + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils, Dictionary +from fairseq.data.fairseq_dataset import FairseqDataset +from fairseq.data.audio.audio_utils import ( + read_from_stored_zip, + is_sf_audio_data, +) + +FEATURE_OR_SF_AUDIO_FILE_EXTENSIONS = {".npy", ".wav", ".flac", ".ogg"} + +logger = logging.getLogger(__name__) + +def parse_path(path: str) -> Tuple[str, List[int]]: + """Parse data path which is either a path to + 1. a .npy/.wav/.flac/.ogg file + 2. a stored ZIP file with slicing info: "[zip_path]:[offset]:[length]" + + Args: + path (str): the data path to parse + + Returns: + file_path (str): the file path + slice_ptr (list of int): empty in case 1; + byte offset and length for the slice in case 2 + """ + + if Path(path).suffix in FEATURE_OR_SF_AUDIO_FILE_EXTENSIONS: + _path, slice_ptr = path, [] + else: + _path, *slice_ptr = path.split(":") + if not Path(_path).is_file(): + raise FileNotFoundError(f"File not found: {_path}") + assert len(slice_ptr) in {0, 1, 2}, f"Invalid path: {path}" + slice_ptr = [int(i) for i in slice_ptr] + return _path, slice_ptr + +def load_audio(manifest_path, max_keep, min_keep, retry_times=5): + n_long, n_short = 0, 0 + names, inds, sizes, chunk_names, chunk_indices = [], [], [], [], [] + for i in range(retry_times): + with open(manifest_path) as f: + root = f.readline().strip() + for ind, line in enumerate(f): + items = line.strip().split("\t") + assert len(items) == 2, line + sz = int(items[1]) + if min_keep is not None and sz < min_keep: + n_short += 1 + elif max_keep is not None and sz > max_keep: + n_long += 1 + else: + fname = items[0].split(":") + if len(fname) > 2: + if len(chunk_names) == 0 or fname[0] != chunk_names[-1]: + chunk_names.append(fname[0]) + chunk_indices.append(len(names)) + names.append(items[0]) + inds.append(ind) + sizes.append(sz) + if len(names) == 0: + logger.warn(f"Fail to load manifest for the {i} time") + time.sleep(1) + continue + else: + break + tot = ind + 1 + logger.info( + ( + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, names, inds, tot, sizes, chunk_names, chunk_indices + + +def load_label(label_path, inds, tot, retry_times=5): + for i in range(retry_times): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + if len(labels) == 0: + logger.warn(f"Fail to load label for the {i} time") + time.sleep(1) + continue + else: + break + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds, tot, retry_times=5): + for i in range(retry_times): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + if len(code_lengths) == 0: + logger.warn(f"Fail to load label for the {i} time") + time.sleep(1) + continue + else: + break + assert ( + len(code_lengths) == tot + ), f"number of labels does not match ({len(code_lengths)} != {tot})" + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + +def verify_label_lengths( + audio_sizes, + audio_rate, + label_path, + label_rate, + inds, + tot, + tol=0.1, # tolerance in seconds +): + if label_rate < 0: + logger.info(f"{label_path} is sequence label. skipped") + return + + with open(label_path) as f: + lengths = [len(line.rstrip().split()) for line in f] + assert len(lengths) == tot + lengths = [lengths[i] for i in inds] + num_invalid = 0 + for i, ind in enumerate(inds): + dur_from_audio = audio_sizes[i] / audio_rate + dur_from_label = lengths[i] / label_rate + if abs(dur_from_audio - dur_from_label) > tol: + logger.warning( + ( + f"audio and label duration differ too much " + f"(|{dur_from_audio} - {dur_from_label}| > {tol}) " + f"in line {ind+1} of {label_path}. Check if `label_rate` " + f"is correctly set (currently {label_rate}). " + f"num. of samples = {audio_sizes[i]}; " + f"label length = {lengths[i]}" + ) + ) + num_invalid += 1 + if num_invalid > 0: + logger.warning( + f"total {num_invalid} (audio, label) pairs with mismatched lengths" + ) + + +class HubertDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + random_crop: bool = False, + single_target: bool = False, + tgt_dict: Optional[Dictionary] = None, + add_decoder_target: bool = False, + fine_tuning: bool = False, + tgt_lang_idx: int = None, + tokenizer = None, + mbart_style_lang_id: bool = False, + retry_times: int = 5, + reduce_label_for_dec: bool = True, + ): + self.audio_root, self.audio_names, inds, tot, self.wav_sizes, self.chunk_names, self.chunk_indices = load_audio( + manifest_path, max_keep_sample_size, min_keep_sample_size, retry_times + ) + self.sample_rate = sample_rate + self.shuffle = shuffle + self.random_crop = random_crop + self.tgt_dict = tgt_dict + self.add_decoder_target = add_decoder_target + self.fine_tuning = fine_tuning + + self.num_labels = len(label_paths) + self.pad_list = pad_list + self.eos_list = eos_list + self.label_processors = label_processors + self.single_target = single_target + self.epoch = 0 + + self.label_rates = ( + [label_rates for _ in range(len(label_paths))] + if isinstance(label_rates, int) + else label_rates + ) + self.store_labels = store_labels + if store_labels: + self.label_list = [load_label(p, inds, tot, retry_times) for p in label_paths] + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds, tot, retry_times) for p in label_paths + ] + assert label_processors is None or len(label_processors) == self.num_labels + for label_path, label_rate in zip(label_paths, self.label_rates): + verify_label_lengths( + self.wav_sizes, sample_rate, label_path, label_rate, inds, tot + ) + + self.max_sample_size = ( + max_sample_size if max_sample_size is not None else sys.maxsize + ) + self.pad_audio = pad_audio + self.normalize = normalize + self.tgt_lang_idx = tgt_lang_idx + self.tokenizer = tokenizer + self.mbart_style_lang_id = mbart_style_lang_id + self.retry_times = retry_times + self.reduce_label_for_dec = reduce_label_for_dec + logger.info( + f"pad_audio={pad_audio}, random_crop={random_crop}, tgt_lang_idx={self.tgt_lang_idx}, reduce_label_for_dec={reduce_label_for_dec}, " + f"mbart_style_lang_id={mbart_style_lang_id}, normalize={normalize}, max_sample_size={self.max_sample_size}" + ) + + def set_epoch(self, epoch): + self.epoch = epoch + + def batch_by_size(self, indices, max_tokens=None, max_sentences=None, required_batch_size_multiple=1): + self.max_tokens = max_tokens + self.max_sentences = max_sentences + self.required_batch_size_multiple = required_batch_size_multiple + if isinstance(indices[0], np.ndarray): + batch_list = [] + for indice in indices: + batch = super(HubertDataset, self).batch_by_size(indice, max_tokens, max_sentences, required_batch_size_multiple) + batch_list.append(batch) + return batch_list + else: + return super(HubertDataset, self).batch_by_size(indices, max_tokens, max_sentences, required_batch_size_multiple) + def shuffle_batches(self, batches, seed): + if isinstance(batches[0], list): + new_batches = [] + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + for batch in batches: + np.random.shuffle(batch) + new_batches.extend(batch) + return new_batches + else: + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + return batches + + def get_audio(self, index): + import soundfile as sf + + wav_path = os.path.join(self.audio_root, self.audio_names[index]) + _path, slice_ptr = parse_path(wav_path) + if len(slice_ptr) == 1: + import kaldiio + feat = kaldiio.load_mat(wav_path) + feat = torch.from_numpy(feat).float() + if self.normalize: + with torch.no_grad(): + feat = F.layer_norm(feat, feat.shape[-1]) + return feat + else: + if len(slice_ptr) == 2: + byte_data = read_from_stored_zip(_path, slice_ptr[0], slice_ptr[1]) + assert is_sf_audio_data(byte_data) + wav_path = io.BytesIO(byte_data) + for i in range(self.retry_times): + if i < self.retry_times - 1: + try: + wav, cur_sample_rate = sf.read(wav_path) + break + except Exception as e: + logger.warn(f"Fail to load wav for the {i} time") + logger.warn(e) + time.sleep(1) + continue + else: + wav, cur_sample_rate = sf.read(wav_path) + + wav = torch.from_numpy(wav).float() + wav = self.postprocess(wav, cur_sample_rate) + return wav + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.tokenizer is not None and self.fine_tuning: + label = self.tokenizer.encode(label) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + def __getitem__(self, index): + wav = self.get_audio(index) + labels = self.get_labels(index) + return {"id": index, "source": wav, "label_list": labels} + + def __len__(self): + return len(self.wav_sizes) + + def crop_to_max_size(self, wav, target_size): + size = len(wav) + diff = size - target_size + if diff <= 0: + return wav, 0 + + start, end = 0, target_size + if self.random_crop: + start = np.random.randint(0, diff + 1) + end = size - diff + start + return wav[start:end], start + + def collater(self, samples): + # target = max(sizes) -> random_crop not used + # target = max_sample_size -> random_crop used for long + samples = [s for s in samples if s["source"] is not None] + if len(samples) == 0: + return {} + + audios = [s["source"] for s in samples] + audio_sizes = [len(s) for s in audios] + if self.pad_audio: + audio_size = min(max(audio_sizes), self.max_sample_size) + else: + audio_size = min(min(audio_sizes), self.max_sample_size) + feat_dim = audios[0].size(-1) if audios[0].dim() > 1 else 1 + collated_audios, padding_mask, audio_starts = self.collater_audio( + audios, audio_size, feat_dim, + ) + + targets_by_label = [ + [s["label_list"][i] for s in samples] for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, audio_size, audio_starts + ) + + if self.add_decoder_target: + if self.fine_tuning: + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]], torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + else: + if self.tokenizer is not None: + decoder_label = [ + # Set 48 for translate int to char and avoid \n + torch.cat( + ( + torch.tensor( + self.tokenizer.sp.Encode( + "".join( + [chr(j + 48) for j in ( + targets_list[0][i, :lengths_list[0][i]].unique_consecutive() if self.reduce_label_for_dec else targets_list[0][i, :lengths_list[0][i]] + ).tolist()] + ), out_type=int + ) + ), + torch.tensor([self.tgt_dict.eos()]) + ), dim=0 + ).long() + for i in range(targets_list[0].size(0)) + ] + else: + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]].unique_consecutive() if self.reduce_label_for_dec else targets_list[0][i, :lengths_list[0][i]], torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + + if self.mbart_style_lang_id: + decoder_label = [ + torch.cat((decoder_label[i], torch.tensor([self.tgt_lang_idx])), 0).long() + for i in range(targets_list[0].size(0)) + ] + + dec_ntokens = sum(x.size(0) for x in decoder_label) + decoder_target = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos() if not self.mbart_style_lang_id else self.tgt_lang_idx, + left_pad=False, + move_eos_to_beginning=False, + ) + decoder_target_lengths = torch.tensor( + [x.size(0) for x in decoder_label], dtype=torch.long + ) + prev_output_tokens = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos() if not self.mbart_style_lang_id else self.tgt_lang_idx, + left_pad=False, + move_eos_to_beginning=True, + ) + + if self.tgt_lang_idx is not None and not self.mbart_style_lang_id: + assert (prev_output_tokens[:, 0] != self.tgt_dict.eos()).sum() == 0 + prev_output_tokens[:, 0] = self.tgt_lang_idx + + net_input = { + "source": collated_audios, + "padding_mask": padding_mask, + "prev_output_tokens": prev_output_tokens, + } + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + "decoder_target": decoder_target, + "decoder_target_lengths": decoder_target_lengths, + "dec_ntokens": dec_ntokens, + "lang_idx": self.tgt_lang_idx, + } + else: + net_input = {"source": collated_audios, "padding_mask": padding_mask} + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + return batch + + def collater_audio(self, audios, audio_size, feat_dim=1): + collated_audios = audios[0].new_zeros(len(audios), audio_size, feat_dim) + padding_mask = ( + torch.BoolTensor(collated_audios.shape[0:2]).fill_(False) + # if self.pad_audio else None + ) + audio_starts = [0 for _ in audios] + for i, audio in enumerate(audios): + audio = audio.view(-1, feat_dim) + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + assert self.pad_audio + collated_audios[i] = torch.cat([audio, audio.new_full((-diff, feat_dim), 0.0)]) + padding_mask[i, diff:] = True + else: + collated_audios[i], audio_starts[i] = self.crop_to_max_size( + audio, audio_size + ) + return collated_audios.squeeze(-1), padding_mask, audio_starts + + def collater_frm_label(self, targets, audio_size, audio_starts, label_rate, pad): + assert label_rate > 0 + s2f = label_rate / self.sample_rate + frm_starts = [int(round(s * s2f)) for s in audio_starts] + frm_size = int(round(audio_size * s2f)) + if not self.pad_audio: + rem_size = [len(t) - s for t, s in zip(targets, frm_starts)] + frm_size = min(frm_size, *rem_size) + targets = [t[s : s + frm_size] for t, s in zip(targets, frm_starts)] + logger.debug(f"audio_starts={audio_starts}") + logger.debug(f"frame_starts={frm_starts}") + logger.debug(f"frame_size={frm_size}") + + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_label(self, targets_by_label, audio_size, audio_starts): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + if label_rate == -1: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + else: + targets, lengths, ntokens = self.collater_frm_label( + targets, audio_size, audio_starts, label_rate, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + if self.pad_audio: + return self.wav_sizes[index] + return min(self.wav_sizes[index], self.max_sample_size) + + @property + def sizes(self): + return np.array(self.wav_sizes) + + def ordered_indices(self): + """Return an ordered list of indices. Batches will be constructed based + on this order.""" + + if self.shuffle: + if len(self.chunk_names) > 0: + logger.info(f"ordered indices for epoch {self.epoch}") + with data_utils.numpy_seed(self.epoch): + self.chunk_order = np.random.permutation(len(self.chunk_names)) + chunk_count = 0 + tmp_sizes = [] + tmp_indices = [] + indice = [] + for i in self.chunk_order: + chunk_count += 1 + start = self.chunk_indices[i] + end = self.chunk_indices[i+1] if i < len(self.chunk_names) - 1 else len(self) + size = list(self.sizes[start:end]) + tmp_indices.extend(list(np.arange(start, end))) + tmp_sizes.extend(size) + if chunk_count % 10 == 0 or i == self.chunk_order[0]: + order = [np.random.permutation(len(tmp_indices))] + order.append( + np.minimum( + np.array(tmp_sizes), + self.max_sample_size, + ) + ) + sort_idx = np.lexsort(order)[::-1] + indice.append(np.array([tmp_indices[k] for k in sort_idx])) + tmp_indices = [] + tmp_sizes =[] + return indice + else: + order = [np.random.permutation(len(self))] + order.append( + np.minimum( + np.array(self.sizes), + self.max_sample_size, + ) + ) + return np.lexsort(order)[::-1] + else: + return np.arange(len(self)) + + def postprocess(self, wav, cur_sample_rate): + if wav.dim() == 2: + wav = wav.mean(-1) + assert wav.dim() == 1, wav.dim() + + if cur_sample_rate != self.sample_rate: + raise Exception(f"sr {cur_sample_rate} != {self.sample_rate}") + + if self.normalize: + with torch.no_grad(): + wav = F.layer_norm(wav, wav.shape) + return wav diff --git a/SpeechT5/Speech2S/speech2s/data/language_trible_dataset.py b/SpeechT5/Speech2S/speech2s/data/language_trible_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..6494127d6bb5d993d557f9f534f7cca83b0f7fa1 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/data/language_trible_dataset.py @@ -0,0 +1,669 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +import numpy as np +import torch +import os +import itertools + +from fairseq.data import FairseqDataset, data_utils +from fairseq.data import ( + AppendTokenDataset, + ConcatDataset, + PrependTokenDataset, + data_utils, + indexed_dataset, +) + +logger = logging.getLogger(__name__) + +def load_langtriple_dataset( + data_path, + split, + src, + src_dict, + ref, + ref_dict, + tgt, + tgt_dict, + combine, + dataset_impl, + upsample_primary, + left_pad_source, + left_pad_target, + max_source_positions, + max_target_positions, + prepend_bos=False, + load_alignments=False, + truncate_source=False, + append_source_id=False, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + prepend_bos_src=None, + lang_format="[{}]", +): + assert not truncate_source + def split_exists(split, src, ref, tgt, lang, data_path): + filename = os.path.join(data_path, "{}.{}-{}-{}.{}".format(split, src, ref, tgt, lang)) + return indexed_dataset.dataset_exists(filename, impl=dataset_impl) + + src_datasets = [] + ref_datasets = [] + tgt_datasets = [] + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + + # infer langcode + if split_exists(split_k, src, ref, tgt, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}-{}.".format(split_k, src, ref, tgt)) + elif split_exists(split_k, tgt, ref, src, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}-{}.".format(split_k, tgt, ref, src)) + else: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + src_dataset = data_utils.load_indexed_dataset( + prefix + src, src_dict, dataset_impl + ) + src_datasets.append(src_dataset) + + ref_dataset = data_utils.load_indexed_dataset( + prefix + ref, ref_dict, dataset_impl + ) + ref_datasets.append(ref_dataset) + + tgt_dataset = data_utils.load_indexed_dataset( + prefix + tgt, tgt_dict, dataset_impl + ) + if tgt_dataset is not None: + tgt_datasets.append(tgt_dataset) + + logger.info( + "{} {} {}-{}-{} {} examples".format( + data_path, split_k, src, ref, tgt, len(src_datasets[-1]) + ) + ) + + if not combine: + break + + assert len(src_datasets) == len(ref_datasets) + assert len(src_datasets) == len(tgt_datasets) or len(tgt_datasets) == 0 + + if len(src_datasets) == 1: + src_dataset = src_datasets[0] + ref_dataset = ref_datasets[0] + tgt_dataset = tgt_datasets[0] if len(tgt_datasets) > 0 else None + else: + sample_ratios = [1] * len(src_datasets) + sample_ratios[0] = upsample_primary + src_dataset = ConcatDataset(src_datasets, sample_ratios) + ref_dataset = ConcatDataset(ref_datasets, sample_ratios) + if len(tgt_datasets) > 0: + tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios) + else: + tgt_dataset = None + + if prepend_bos: + assert hasattr(src_dict, "bos_index") and hasattr(ref_dict, "bos_index") and hasattr(tgt_dict, "bos_index") + src_dataset = PrependTokenDataset(src_dataset, src_dict.bos()) + ref_dataset = PrependTokenDataset(ref_dataset, ref_dict.bos()) + if tgt_dataset is not None: + tgt_dataset = PrependTokenDataset(tgt_dataset, tgt_dict.bos()) + elif prepend_bos_src is not None: + logger.info(f"prepending src bos: {prepend_bos_src}") + src_dataset = PrependTokenDataset(src_dataset, prepend_bos_src) + ref_dataset = PrependTokenDataset(ref_dataset, prepend_bos_src) + + eos = None + if append_source_id: + src_dataset = AppendTokenDataset( + src_dataset, src_dict.index(lang_format.format(src)) + ) + ref_dataset = AppendTokenDataset( + ref_dataset, ref_dict.index(lang_format.format(ref)) + ) + if tgt_dataset is not None: + tgt_dataset = AppendTokenDataset( + tgt_dataset, tgt_dict.index(lang_format.format(tgt)) + ) + eos = tgt_dict.index(lang_format.format(tgt)) + + align_dataset = None + if load_alignments: + align_path = os.path.join(data_path, "{}.align.{}-{}".format(split, src, tgt)) + if indexed_dataset.dataset_exists(align_path, impl=dataset_impl): + align_dataset = data_utils.load_indexed_dataset( + align_path, None, dataset_impl + ) + + tgt_dataset_sizes = tgt_dataset.sizes if tgt_dataset is not None else None + return LanguageTripleDataset( + src_dataset, + src_dataset.sizes, + src_dict, + ref_dataset, + ref_dataset.sizes, + ref_dict, + tgt_dataset, + tgt_dataset_sizes, + tgt_dict, + left_pad_source=left_pad_source, + left_pad_target=left_pad_target, + align_dataset=align_dataset, + eos=eos, + num_buckets=num_buckets, + shuffle=shuffle, + pad_to_multiple=pad_to_multiple, + ) + + +def collate( + samples, + pad_idx, + eos_idx, + left_pad_source=True, + left_pad_target=False, + input_feeding=True, + pad_to_length=None, + pad_to_multiple=1, +): + if len(samples) == 0: + return {} + + def merge(key, left_pad, move_eos_to_beginning=False, pad_to_length=None): + return data_utils.collate_tokens( + [s[key] for s in samples], + pad_idx, + None, + left_pad, + move_eos_to_beginning, + pad_to_length=pad_to_length, + pad_to_multiple=pad_to_multiple, + ) + + def check_alignment(alignment, src_len, tgt_len): + if alignment is None or len(alignment) == 0: + return False + if ( + alignment[:, 0].max().item() >= src_len - 1 + or alignment[:, 1].max().item() >= tgt_len - 1 + ): + logger.warning("alignment size mismatch found, skipping alignment!") + return False + return True + + def compute_alignment_weights(alignments): + """ + Given a tensor of shape [:, 2] containing the source-target indices + corresponding to the alignments, a weight vector containing the + inverse frequency of each target index is computed. + For e.g. if alignments = [[5, 7], [2, 3], [1, 3], [4, 2]], then + a tensor containing [1., 0.5, 0.5, 1] should be returned (since target + index 3 is repeated twice) + """ + align_tgt = alignments[:, 1] + _, align_tgt_i, align_tgt_c = torch.unique( + align_tgt, return_inverse=True, return_counts=True + ) + align_weights = align_tgt_c[align_tgt_i[np.arange(len(align_tgt))]] + return 1.0 / align_weights.float() + + id = torch.LongTensor([s["id"] for s in samples]) + src_tokens = merge( + "source", + left_pad=left_pad_source, + pad_to_length=pad_to_length["source"] if pad_to_length is not None else None, + ) + ref_tokens = merge( + "reference", + left_pad=left_pad_source, + pad_to_length=pad_to_length["source"] if pad_to_length is not None else None, + ) + # sort by descending source length + src_lengths = torch.LongTensor( + [s["source"].ne(pad_idx).long().sum() for s in samples] + ) + ref_lengths = torch.LongTensor( + [s["reference"].ne(pad_idx).long().sum() for s in samples] + ) + src_lengths, sort_order = src_lengths.sort(descending=True) + id = id.index_select(0, sort_order) + src_tokens = src_tokens.index_select(0, sort_order) + ref_lengths = ref_lengths.index_select(0, sort_order) + ref_tokens = ref_tokens.index_select(0, sort_order) + + prev_output_tokens = None + target = None + if samples[0].get("target", None) is not None: + target = merge( + "target", + left_pad=left_pad_target, + pad_to_length=pad_to_length["target"] + if pad_to_length is not None + else None, + ) + target = target.index_select(0, sort_order) + tgt_lengths = torch.LongTensor( + [s["target"].ne(pad_idx).long().sum() for s in samples] + ).index_select(0, sort_order) + ntokens = tgt_lengths.sum().item() + + if samples[0].get("prev_output_tokens", None) is not None: + prev_output_tokens = merge("prev_output_tokens", left_pad=left_pad_target) + elif input_feeding: + # we create a shifted version of targets for feeding the + # previous output token(s) into the next decoder step + prev_output_tokens = merge( + "target", + left_pad=left_pad_target, + move_eos_to_beginning=True, + pad_to_length=pad_to_length["target"] + if pad_to_length is not None + else None, + ) + else: + ntokens = src_lengths.sum().item() + + batch = { + "id": id, + "nsentences": len(samples), + "ntokens": ntokens, + "net_input": { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + }, + "target": target, + "ref_tokens": ref_tokens, + "ref_lengths": ref_lengths, + } + if prev_output_tokens is not None: + batch["net_input"]["prev_output_tokens"] = prev_output_tokens.index_select( + 0, sort_order + ) + + if samples[0].get("alignment", None) is not None: + bsz, tgt_sz = batch["target"].shape + src_sz = batch["net_input"]["src_tokens"].shape[1] + + offsets = torch.zeros((len(sort_order), 2), dtype=torch.long) + offsets[:, 1] += torch.arange(len(sort_order), dtype=torch.long) * tgt_sz + if left_pad_source: + offsets[:, 0] += src_sz - src_lengths + if left_pad_target: + offsets[:, 1] += tgt_sz - tgt_lengths + + alignments = [ + alignment + offset + for align_idx, offset, src_len, tgt_len in zip( + sort_order, offsets, src_lengths, tgt_lengths + ) + for alignment in [samples[align_idx]["alignment"].view(-1, 2)] + if check_alignment(alignment, src_len, tgt_len) + ] + + if len(alignments) > 0: + alignments = torch.cat(alignments, dim=0) + align_weights = compute_alignment_weights(alignments) + + batch["alignments"] = alignments + batch["align_weights"] = align_weights + + if samples[0].get("constraints", None) is not None: + # Collate the packed constraints across the samples, padding to + # the length of the longest sample. + lens = [sample.get("constraints").size(0) for sample in samples] + max_len = max(lens) + constraints = torch.zeros((len(samples), max(lens))).long() + for i, sample in enumerate(samples): + constraints[i, 0 : lens[i]] = samples[i].get("constraints") + batch["constraints"] = constraints.index_select(0, sort_order) + + return batch + + +class LanguageTripleDataset(FairseqDataset): + """ + A pair of torch.utils.data.Datasets. + + Args: + src (torch.utils.data.Dataset): source dataset to wrap + src_sizes (List[int]): source sentence lengths + src_dict (~fairseq.data.Dictionary): source vocabulary + tgt (torch.utils.data.Dataset, optional): target dataset to wrap + tgt_sizes (List[int], optional): target sentence lengths + tgt_dict (~fairseq.data.Dictionary, optional): target vocabulary + left_pad_source (bool, optional): pad source tensors on the left side + (default: True). + left_pad_target (bool, optional): pad target tensors on the left side + (default: False). + shuffle (bool, optional): shuffle dataset elements before batching + (default: True). + input_feeding (bool, optional): create a shifted version of the targets + to be passed into the model for teacher forcing (default: True). + remove_eos_from_source (bool, optional): if set, removes eos from end + of source if it's present (default: False). + append_eos_to_target (bool, optional): if set, appends eos to end of + target if it's absent (default: False). + align_dataset (torch.utils.data.Dataset, optional): dataset + containing alignments. + constraints (Tensor, optional): 2d tensor with a concatenated, zero- + delimited list of constraints for each sentence. + append_bos (bool, optional): if set, appends bos to the beginning of + source/target sentence. + num_buckets (int, optional): if set to a value greater than 0, then + batches will be bucketed into the given number of batch shapes. + src_lang_id (int, optional): source language ID, if set, the collated batch + will contain a field 'src_lang_id' in 'net_input' which indicates the + source language of the samples. + tgt_lang_id (int, optional): target language ID, if set, the collated batch + will contain a field 'tgt_lang_id' which indicates the target language + of the samples. + """ + + def __init__( + self, + src, + src_sizes, + src_dict, + ref, + ref_sizes, + ref_dict, + tgt=None, + tgt_sizes=None, + tgt_dict=None, + left_pad_source=True, + left_pad_target=False, + shuffle=True, + input_feeding=True, + remove_eos_from_source=False, + append_eos_to_target=False, + align_dataset=None, + constraints=None, + append_bos=False, + eos=None, + num_buckets=0, + src_lang_id=None, + tgt_lang_id=None, + pad_to_multiple=1, + ): + if tgt_dict is not None: + assert src_dict.pad() == tgt_dict.pad() + assert src_dict.eos() == tgt_dict.eos() + assert src_dict.unk() == tgt_dict.unk() + if tgt is not None: + assert len(src) == len( + tgt + ), "Source and target must contain the same number of examples" + assert len(src) == len( + ref + ), "Source and reference must contain the same number of examples" + self.src = src + self.ref = ref + self.tgt = tgt + self.src_sizes = np.array(src_sizes) + self.ref_sizes = np.array(ref_sizes) + self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None + self.sizes = ( + np.vstack((self.src_sizes, self.tgt_sizes)).T + if self.tgt_sizes is not None + else self.src_sizes + ) + self.src_dict = src_dict + self.ref_dict = ref_dict + self.tgt_dict = tgt_dict + self.left_pad_source = left_pad_source + self.left_pad_target = left_pad_target + self.shuffle = shuffle + self.input_feeding = input_feeding + self.remove_eos_from_source = remove_eos_from_source + self.append_eos_to_target = append_eos_to_target + self.align_dataset = align_dataset + if self.align_dataset is not None: + assert ( + self.tgt_sizes is not None + ), "Both source and target needed when alignments are provided" + self.constraints = constraints + self.append_bos = append_bos + self.eos = eos if eos is not None else src_dict.eos() + self.src_lang_id = src_lang_id + self.tgt_lang_id = tgt_lang_id + if num_buckets > 0: + from fairseq.data import BucketPadLengthDataset + + self.src = BucketPadLengthDataset( + self.src, + sizes=self.src_sizes, + num_buckets=num_buckets, + pad_idx=self.src_dict.pad(), + left_pad=self.left_pad_source, + ) + self.src_sizes = self.src.sizes + logger.info("bucketing source lengths: {}".format(list(self.src.buckets))) + self.ref = BucketPadLengthDataset( + self.ref, + sizes=self.ref_sizes, + num_buckets=num_buckets, + pad_idx=self.ref_dict.pad(), + left_pad=self.left_pad_source, + ) + self.ref_sizes = self.ref.sizes + logger.info("bucketing reference lengths: {}".format(list(self.src.buckets))) + if self.tgt is not None: + self.tgt = BucketPadLengthDataset( + self.tgt, + sizes=self.tgt_sizes, + num_buckets=num_buckets, + pad_idx=self.tgt_dict.pad(), + left_pad=self.left_pad_target, + ) + self.tgt_sizes = self.tgt.sizes + logger.info( + "bucketing target lengths: {}".format(list(self.tgt.buckets)) + ) + + # determine bucket sizes using self.num_tokens, which will return + # the padded lengths (thanks to BucketPadLengthDataset) + num_tokens = np.vectorize(self.num_tokens, otypes=[np.compat.long]) + self.bucketed_num_tokens = num_tokens(np.arange(len(self.src))) + self.buckets = [ + (None, num_tokens) for num_tokens in np.unique(self.bucketed_num_tokens) + ] + else: + self.buckets = None + self.pad_to_multiple = pad_to_multiple + + def get_batch_shapes(self): + return self.buckets + + def __getitem__(self, index): + tgt_item = self.tgt[index] if self.tgt is not None else None + src_item = self.src[index] + ref_item = self.ref[index] + # Append EOS to end of tgt sentence if it does not have an EOS and remove + # EOS from end of src sentence if it exists. This is useful when we use + # use existing datasets for opposite directions i.e., when we want to + # use tgt_dataset as src_dataset and vice versa + if self.append_eos_to_target: + eos = self.tgt_dict.eos() if self.tgt_dict else self.src_dict.eos() + if self.tgt and self.tgt[index][-1] != eos: + tgt_item = torch.cat([self.tgt[index], torch.LongTensor([eos])]) + + if self.append_bos: + bos = self.tgt_dict.bos() if self.tgt_dict else self.src_dict.bos() + if self.tgt and self.tgt[index][0] != bos: + tgt_item = torch.cat([torch.LongTensor([bos]), self.tgt[index]]) + + bos = self.src_dict.bos() + if self.src[index][0] != bos: + src_item = torch.cat([torch.LongTensor([bos]), self.src[index]]) + if self.ref[index][0] != bos: + ref_item = torch.cat([torch.LongTensor([bos]), self.ref[index]]) + + if self.remove_eos_from_source: + eos = self.src_dict.eos() + if self.src[index][-1] == eos: + src_item = self.src[index][:-1] + if self.ref[index][-1] == eos: + ref_item = self.ref[index][:-1] + + example = { + "id": index, + "source": src_item, + "reference": ref_item, + "target": tgt_item, + } + if self.align_dataset is not None: + example["alignment"] = self.align_dataset[index] + if self.constraints is not None: + example["constraints"] = self.constraints[index] + return example + + def __len__(self): + return len(self.src) + + def collater(self, samples, pad_to_length=None): + """Merge a list of samples to form a mini-batch. + + Args: + samples (List[dict]): samples to collate + pad_to_length (dict, optional): a dictionary of + {'source': source_pad_to_length, 'target': target_pad_to_length} + to indicate the max length to pad to in source and target respectively. + + Returns: + dict: a mini-batch with the following keys: + + - `id` (LongTensor): example IDs in the original input order + - `ntokens` (int): total number of tokens in the batch + - `net_input` (dict): the input to the Model, containing keys: + + - `src_tokens` (LongTensor): a padded 2D Tensor of tokens in + the source sentence of shape `(bsz, src_len)`. Padding will + appear on the left if *left_pad_source* is ``True``. + - `src_lengths` (LongTensor): 1D Tensor of the unpadded + lengths of each source sentence of shape `(bsz)` + - `prev_output_tokens` (LongTensor): a padded 2D Tensor of + tokens in the target sentence, shifted right by one + position for teacher forcing, of shape `(bsz, tgt_len)`. + This key will not be present if *input_feeding* is + ``False``. Padding will appear on the left if + *left_pad_target* is ``True``. + - `src_lang_id` (LongTensor): a long Tensor which contains source + language IDs of each sample in the batch + + - `target` (LongTensor): a padded 2D Tensor of tokens in the + target sentence of shape `(bsz, tgt_len)`. Padding will appear + on the left if *left_pad_target* is ``True``. + - `tgt_lang_id` (LongTensor): a long Tensor which contains target language + IDs of each sample in the batch + """ + res = collate( + samples, + pad_idx=self.src_dict.pad(), + eos_idx=self.eos, + left_pad_source=self.left_pad_source, + left_pad_target=self.left_pad_target, + input_feeding=self.input_feeding, + pad_to_length=pad_to_length, + pad_to_multiple=self.pad_to_multiple, + ) + if self.src_lang_id is not None or self.tgt_lang_id is not None: + src_tokens = res["net_input"]["src_tokens"] + bsz = src_tokens.size(0) + if self.src_lang_id is not None: + res["net_input"]["src_lang_id"] = ( + torch.LongTensor([[self.src_lang_id]]).expand(bsz, 1).to(src_tokens) + ) + if self.tgt_lang_id is not None: + res["tgt_lang_id"] = ( + torch.LongTensor([[self.tgt_lang_id]]).expand(bsz, 1).to(src_tokens) + ) + return res + + def num_tokens(self, index): + """Return the number of tokens in a sample. This value is used to + enforce ``--max-tokens`` during batching.""" + return max( + self.src_sizes[index], + self.tgt_sizes[index] if self.tgt_sizes is not None else 0, + ) + + def num_tokens_vec(self, indices): + """Return the number of tokens for a set of positions defined by indices. + This value is used to enforce ``--max-tokens`` during batching.""" + sizes = self.src_sizes[indices] + if self.tgt_sizes is not None: + sizes = np.maximum(sizes, self.tgt_sizes[indices]) + return sizes + + def size(self, index): + """Return an example's size as a float or tuple. This value is used when + filtering a dataset with ``--max-positions``.""" + return ( + self.src_sizes[index], + self.tgt_sizes[index] if self.tgt_sizes is not None else 0, + ) + + def ordered_indices(self): + """Return an ordered list of indices. Batches will be constructed based + on this order.""" + if self.shuffle: + indices = np.random.permutation(len(self)).astype(np.int64) + else: + indices = np.arange(len(self), dtype=np.int64) + if self.buckets is None: + # sort by target length, then source length + if self.tgt_sizes is not None: + indices = indices[np.argsort(self.tgt_sizes[indices], kind="mergesort")] + return indices[np.argsort(self.src_sizes[indices], kind="mergesort")] + else: + # sort by bucketed_num_tokens, which is: + # max(padded_src_len, padded_tgt_len) + return indices[ + np.argsort(self.bucketed_num_tokens[indices], kind="mergesort") + ] + + @property + def supports_prefetch(self): + return getattr(self.src, "supports_prefetch", False) and ( + getattr(self.tgt, "supports_prefetch", False) or self.tgt is None + ) + + def prefetch(self, indices): + self.src.prefetch(indices) + if self.tgt is not None: + self.tgt.prefetch(indices) + if self.align_dataset is not None: + self.align_dataset.prefetch(indices) + + def filter_indices_by_size(self, indices, max_sizes): + """Filter a list of sample indices. Remove those that are longer + than specified in max_sizes. + + Args: + indices (np.array): original array of sample indices + max_sizes (int or list[int] or tuple[int]): max sample size, + can be defined separately for src and tgt (then list or tuple) + + Returns: + np.array: filtered sample array + list: list of removed indices + """ + return data_utils.filter_paired_dataset_indices_by_size( + self.src_sizes, + self.tgt_sizes, + indices, + max_sizes, + ) diff --git a/SpeechT5/Speech2S/speech2s/data/load_langpair_dataset.py b/SpeechT5/Speech2S/speech2s/data/load_langpair_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..bfd204598e67d41a5688e16b0835f96fd40cf384 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/data/load_langpair_dataset.py @@ -0,0 +1,172 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/272c4c5197250997148fb12c0db6306035f166a4/fairseq/tasks/translation.py + 1. Add custom lang_format in function load_langpair_dataset + 2. If truncate_source (default no), use RandomCropDataset instead of TruncateDataset +""" + +import itertools +import logging +import os + +from fairseq.data import ( + AppendTokenDataset, + LanguagePairDataset, + PrependTokenDataset, + StripTokenDataset, + TruncateDataset, + RandomCropDataset, + data_utils, + indexed_dataset, +) + +from speechut.data.concat_dataset import ConcatDataset + + +EVAL_BLEU_ORDER = 4 + + +logger = logging.getLogger(__name__) + + +def load_langpair_dataset( + data_path, + split, + src, + src_dict, + tgt, + tgt_dict, + combine, + dataset_impl, + upsample_primary, + left_pad_source, + left_pad_target, + max_source_positions, + max_target_positions, + prepend_bos=False, + load_alignments=False, + truncate_source=False, + append_source_id=False, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + prepend_bos_src=None, + lang_format="[{}]", + input_feeding=True, +): + def split_exists(split, src, tgt, lang, data_path): + filename = os.path.join(data_path, "{}.{}-{}.{}".format(split, src, tgt, lang)) + return indexed_dataset.dataset_exists(filename, impl=dataset_impl) + + src_datasets = [] + tgt_datasets = [] + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + + # infer langcode + if split_exists(split_k, src, tgt, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, src, tgt)) + elif split_exists(split_k, tgt, src, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, tgt, src)) + else: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + src_dataset = data_utils.load_indexed_dataset( + prefix + src, src_dict, dataset_impl + ) + if truncate_source: + src_dataset = AppendTokenDataset( + RandomCropDataset( + StripTokenDataset(src_dataset, src_dict.eos()), + max_source_positions - 1, + ), + src_dict.eos(), + ) + src_datasets.append(src_dataset) + + tgt_dataset = data_utils.load_indexed_dataset( + prefix + tgt, tgt_dict, dataset_impl + ) + if tgt_dataset is not None: + tgt_datasets.append(tgt_dataset) + + logger.info( + "{} {} {}-{} {} examples".format( + data_path, split_k, src, tgt, len(src_datasets[-1]) + ) + ) + + if not combine: + break + + assert len(src_datasets) == len(tgt_datasets) or len(tgt_datasets) == 0 + + if len(src_datasets) == 1: + src_dataset = src_datasets[0] + tgt_dataset = tgt_datasets[0] if len(tgt_datasets) > 0 else None + else: + sample_ratios = [1] * len(src_datasets) + sample_ratios[0] = upsample_primary + src_dataset = ConcatDataset(src_datasets, sample_ratios) + if len(tgt_datasets) > 0: + tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios) + else: + tgt_dataset = None + + if prepend_bos: + assert hasattr(src_dict, "bos_index") and hasattr(tgt_dict, "bos_index") + src_dataset = PrependTokenDataset(src_dataset, src_dict.bos()) + if tgt_dataset is not None: + tgt_dataset = PrependTokenDataset(tgt_dataset, tgt_dict.bos()) + elif prepend_bos_src is not None: + logger.info(f"prepending src bos: {prepend_bos_src}") + src_dataset = PrependTokenDataset(src_dataset, prepend_bos_src) + + eos = None + if append_source_id: + src_dataset = AppendTokenDataset( + src_dataset, src_dict.index(lang_format.format(src)) + ) + if tgt_dataset is not None: + tgt_dataset = AppendTokenDataset( + tgt_dataset, tgt_dict.index(lang_format.format(tgt)) + ) + eos = tgt_dict.index(lang_format.format(tgt)) + + align_dataset = None + if load_alignments: + align_path = os.path.join(data_path, "{}.align.{}-{}".format(split, src, tgt)) + if indexed_dataset.dataset_exists(align_path, impl=dataset_impl): + align_dataset = data_utils.load_indexed_dataset( + align_path, None, dataset_impl + ) + + tgt_dataset_sizes = tgt_dataset.sizes if tgt_dataset is not None else None + return LanguagePairDataset( + src_dataset, + src_dataset.sizes, + src_dict, + tgt_dataset, + tgt_dataset_sizes, + tgt_dict, + left_pad_source=left_pad_source, + left_pad_target=left_pad_target, + align_dataset=align_dataset, + eos=eos, + num_buckets=num_buckets, + shuffle=shuffle, + pad_to_multiple=pad_to_multiple, + input_feeding=input_feeding, + ) diff --git a/SpeechT5/Speech2S/speech2s/data/multimodal_corpus_dataset.py b/SpeechT5/Speech2S/speech2s/data/multimodal_corpus_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..19a6f8962757dec9b32430a98cd6e850d1f30d19 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/data/multimodal_corpus_dataset.py @@ -0,0 +1,368 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +from os import replace +import time +from collections import OrderedDict +from typing import Any, Dict, List, Optional + +import numpy as np +from fairseq.data import data_utils + +from fairseq.data import FairseqDataset + +logger = logging.getLogger(__name__) + + +class MultiCorpusDataset(FairseqDataset): + """ + see fairseq/fairseq/data/multi_corpus_dataset.__doc__ + + Args: + datasets: a OrderedDict of FairseqDataset instances. + distribution: a List containing the probability of getting an utterance from + corresponding dataset + seed: random seed for sampling the datsets + sort_indices: if true, will sort the ordered indices by size + batch_sample: if true, will ensure each batch is from a single dataset + """ + + def __init__( + self, + datasets: Dict[str, FairseqDataset], + max_positions: Dict, + distribution: List[float], + max_tokens_ratio: List[float], + seed: int = 1234, + sort_indices: bool = False, + check_length: bool = False, + ): + super().__init__() + assert isinstance(datasets, OrderedDict) + assert len(datasets) == len(distribution) + # assert sum(distribution) == 1 + self.datasets = datasets + self.distribution = distribution + self.max_tokens_ratio = max_tokens_ratio + self.seed = seed + self.sort_indices = sort_indices + self.max_positions = max_positions + self.check_length = check_length + + # Avoid repeated conversions to list later + self.dataset_list = list(datasets.values()) + self.total_num_instances = 0 + + # first_dataset = self.dataset_list[0] + + self.num_instances_per_dataset = [] + self.dataset_offsets = [] + for i, dataset in enumerate(self.dataset_list): + assert isinstance(dataset, FairseqDataset) + # assert type(dataset) is type(first_dataset) + self.num_instances_per_dataset.append( + 0 if self.distribution[i] == 0 else len(dataset) + ) + self.dataset_offsets.append(self.total_num_instances) + self.total_num_instances += self.num_instances_per_dataset[i] + + def ordered_indices(self): + start = time.time() + with data_utils.numpy_seed(self.seed, self.epoch): + logger.info(f"sampling new dataset with seed {self.seed} epoch {self.epoch}") + sampled_indices = {} + + # For each dataset i, sample self.distribution[i] * self.total_num_instances + for i, key in enumerate(self.datasets): + tp = time.time() + if self.distribution[i] == 0: + # skip dataset if sampling probability is 0 + continue + + if i < len(self.datasets) - 1: + num_instances = int(self.distribution[i] * self.total_num_instances) + high = self.dataset_offsets[i + 1] + else: + num_instances = int(self.distribution[i] * self.total_num_instances) + high = self.total_num_instances + + logger.info(f"sampling {num_instances} from {key} dataset") + + # First, add k copies of the dataset where k = num_instances // len(dataset). + # This ensures an equal distribution of the data points as much as possible. + # For the remaining entries randomly sample them + dataset_size = len(self.datasets[key]) + num_copies = num_instances // dataset_size + dataset_indices = np.random.permutation(high - self.dataset_offsets[i])[: num_instances - num_copies * dataset_size] + if num_copies > 0: + dataset_indices = np.concatenate( + ( + np.repeat( + np.arange(high - self.dataset_offsets[i]), num_copies + ), + dataset_indices, + ) + ) + # filter by size, we should ignore it by setting check_length=False + # , as it is very time-consuming on large dadaset + if self.max_positions[key] is not None and self.check_length: + dataset_indices, ignored = self.datasets[key].filter_indices_by_size( + dataset_indices, + self.max_positions[key], + ) + if len(ignored) > 0: + logger.warning( + ( + "{:,} samples have invalid sizes and will be skipped, " + "max_positions={}, first few sample ids={}" + ).format(len(ignored), self.max_positions[key], ignored[:10]) + ) + + if self.sort_indices: + logger.info(" - sampled indices took {}s".format(time.time() - tp)) + tp = time.time() + dataset_indices = np.sort(dataset_indices) + ordered_indices = self.datasets[key].ordered_indices() + if isinstance(ordered_indices[0], np.ndarray): # chunked audio data + dataset_indices = [order_idx + self.dataset_offsets[i] for order_idx in ordered_indices] + assert self.dataset_offsets[i] == 0 + # TODO for chunked audio data, now assume len(dataset_indices) == len(dataset). Don't filter any data. + else: + dataset_indices = ordered_indices[dataset_indices] + self.dataset_offsets[i] + logger.info(" - ordered_indices took {}s".format(time.time() - tp)) + else: + np.random.shuffle(dataset_indices) + + sampled_indices[key] = dataset_indices + + logger.info( + "multi_corpus_dataset ordered_indices took {}s".format( + time.time() - start + ) + ) + return sampled_indices + + def _map_index(self, index: int): + """ + If dataset A has length N and dataset B has length M + then index 1 maps to index 1 of dataset A, and index N + 1 + maps to index 1 of B. + """ + counter = 0 + for num_instances, key in zip(self.num_instances_per_dataset, self.datasets): + if index < counter + num_instances: + return index - counter, key + counter += num_instances + raise ValueError( + "Invalid index: {}, max: {}".format(index, self.total_num_instances) + ) + + def __len__(self): + """ + Length of this dataset is the sum of individual datasets + """ + return self.total_num_instances + + def __getitem__(self, index): + new_index, key = self._map_index(index) + try: + item = self.datasets[key][new_index] + item["full_id"] = index + return item + except Exception as e: + e.args = (f"Error from {key} dataset", *e.args) + raise + + def collater(self, samples): + """ + If we are doing batch sampling, then pick the right collater to use. + + Otherwise we assume all collaters are the same. + """ + if len(samples) == 0: + return None + + samples_dict = {key: [] for key in self.datasets} + for s in samples: + _, key = self._map_index(s["full_id"]) + samples_dict[key].append(s) + + batch = {} + for key in samples_dict: + if len(samples_dict[key]) == 0: + continue + batch[key] = self.datasets[key].collater(samples_dict[key]) + + return batch + + + def num_tokens(self, index: int): + index, key = self._map_index(index) + return self.datasets[key].num_tokens(index) + + def size(self, index: int): + index, key = self._map_index(index) + return self.datasets[key].size(index) + + @property + def can_reuse_epoch_itr_across_epochs(self): + return False + + def set_epoch(self, epoch, **unused): + super().set_epoch(epoch) + logger.info(f"setting epoch of multi_corpus_dataset to {epoch}") + for ds in self.dataset_list: + if hasattr(ds, "set_epoch"): + ds.set_epoch(epoch) + self.epoch = epoch + + @property + def supports_prefetch(self): + return False + + @property + def supports_fetch_outside_dataloader(self): + return all( + self.datasets[key].supports_fetch_outside_dataloader + for key in self.datasets + ) + + + def batch_by_size( + self, + indices, + max_tokens=None, + max_sentences=None, + required_batch_size_multiple=1, + ): + dataset_indices = indices + batches_dict = {} + for n, key in enumerate(dataset_indices): + max_tokens_ratio = self.max_tokens_ratio[n] + if isinstance(dataset_indices[key][0], np.ndarray): # chunked audio data + cur_batches = self.datasets[key].batch_by_size( + dataset_indices[key], + round(max_tokens * max_tokens_ratio), + max_sentences, + required_batch_size_multiple, + ) + logger.info(f"Created {sum([len(b) for b in cur_batches])} [{len(cur_batches)}] batches for dataset {key}") + else: + cur_batches = super().batch_by_size( + np.array(dataset_indices[key], dtype=np.int64), + round(max_tokens * max_tokens_ratio), + max_sentences, + required_batch_size_multiple, + ) + logger.info(f"Created {len(cur_batches)} batches for dataset {key}") + batches_dict[key] = cur_batches + + return batches_dict + + + def get_batch_sampler( + self, + indices, + num_shards, + seed, + max_tokens=None, + max_sentences=None, + required_batch_size_multiple=1, + split_modality_batch=False, + ): + + def batch_sampler(dataset, epoch): + start = time.time() + batches_dict = dataset.batch_by_size( + indices, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + ) + logger.info(f"multi_corpus_dataset, batch_by_size took {time.time() - start}s") + start = time.time() + new_batches = [] + + ### shuffle inner group size, split into speech/text batches + shuffled_batches_list = [] + speech_batches = [] + ### we should specify the speech_batches because: we need concatenate different speech datasets + # (e.g. ltr or km) instead of loading them parellelly. + for name, batches in batches_dict.items(): + if name.startswith("speech"): + if isinstance(batches[0], list): # chunked audio data + batches = self.datasets[name].shuffle_batches(list(batches), seed + epoch) + shuffled_batches_list.append(batches) + else: + batches = inner_bucket_shuffle(batches, seed+epoch, num_shards*10) + batches = batches[: (len(batches) // num_shards) * num_shards] + if len(batches) == 0: + logger.warning(f"Sample 0 batch for {name}, you should ensure that no {name} data provided.") + else: + speech_batches += batches + else: + batches = inner_bucket_shuffle(batches, seed+epoch, num_shards*10) + batches = batches[: (len(batches) // num_shards) * num_shards] + if len(batches) == 0: + logger.warning(f"Sample 0 batch for {name}, you should ensure that no {name} data provided.") + else: + batches = shuffle_buckets(batches, seed=seed+epoch, inner_shuf=False) + shuffled_batches_list.append(batches) + if len(speech_batches) > 0: + speech_batches = shuffle_buckets(speech_batches, seed=seed+epoch, inner_shuf=False) + shuffled_batches_list.append(speech_batches) + + ### create the final new_batches + num_batch = min(len(batches) for batches in shuffled_batches_list) + if split_modality_batch: + for i in range(0, num_batch, num_shards): + for batches in shuffled_batches_list: + new_batches += batches[i: i + num_shards] + else: + for i in range(num_batch): + new_batches.append(np.concatenate([batches[i] for batches in shuffled_batches_list])) + + logger.info(f"multi_corpus_dataset sample {len(new_batches)} batches, took {time.time() - start}s") + return new_batches + + def inner_bucket_shuffle(batches, seed, bucket_size=10, thr=0): + """we assert batches is sorted form long to short. + shuffle samples in a buctet(e.g. 10 batches). + batches: a list of numpy array""" + num_batch = len(batches) + new_batches = [] + num_buckets = len(batches) // bucket_size + i = 0 + while i < num_batch: + if (i < bucket_size * thr or + i >= bucket_size * (num_buckets - thr) + ): + new_batches.append(batches[i]) + i += 1 + else: + group = np.concatenate(batches[i: i+bucket_size]) + with data_utils.numpy_seed(seed): + np.random.shuffle(group) + new_batches += np.array_split(group, bucket_size) + i += bucket_size + assert all([len(batch) > 0 for batch in new_batches]) + return new_batches + + def shuffle_buckets(batches, seed, inner_shuf=True): + if inner_shuf: + batches = inner_bucket_shuffle(batches, seed, num_shards*10) + batches = [batches[i: i + num_shards] for i in range(0, len(batches)-num_shards+1, num_shards)] + assert len(batches[-1]) == num_shards + new_batches = [] + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + for group in batches: + new_batches += group + return new_batches + + return batch_sampler diff --git a/SpeechT5/Speech2S/speech2s/models/__init__.py b/SpeechT5/Speech2S/speech2s/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SpeechT5/Speech2S/speech2s/models/speechut.py b/SpeechT5/Speech2S/speech2s/models/speechut.py new file mode 100644 index 0000000000000000000000000000000000000000..cb668286c1c1c420d0c7d7b9e74a3bca17c6c871 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/models/speechut.py @@ -0,0 +1,785 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import utils, checkpoint_utils +from fairseq.data.data_utils import compute_mask_indices +from fairseq.data.dictionary import Dictionary +from fairseq.dataclass import ChoiceEnum +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.transformer import Embedding +from fairseq.file_io import PathManager +from torch import Tensor +from fairseq.models.wav2vec.wav2vec2 import ConvFeatureExtractionModel +from fairseq.modules import GradMultiply, LayerNorm +from fairseq.tasks.hubert_pretraining import ( + HubertPretrainingConfig, + HubertPretrainingTask, +) +from fairseq.models.hubert import HubertConfig +from fairseq.models.transformer import TransformerConfig +from speechut.modules import TransformerEncoder +from speechut.modules import TransformerEncoderBase +from speechut.modules import TransformerDecoderBaseScriptable + +logger = logging.getLogger(__name__) + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) + + +@dataclass + +class SpeechutConfig(HubertConfig): + use_rel_pos_enc: bool = field( + default=False, + metadata={"help": "whether to use relative positional encoding"}, + ) + scaling_for_att: float = field( + default=1.0, + metadata={"help": "scaling for attention weights to prevent overflow issue (for large model)"}, + ) + + # unit encoder-decoder + text_transformer: TransformerConfig = TransformerConfig() + reset_decoder_embedding_config: bool = field( + default=False, + metadata={"help": "reset the no_scale_embedding/layernorm_embedding to default for the decoder"}, + ) + add_unit_encoder: bool = field( + default=False, + metadata={"help": "add unit encoder"}, + ) + add_decoder: bool = field( + default=True, + metadata={"help": "add decoder"}, + ) + add_text_ctc: bool = field( + default=False, + metadata={"help": "add_text_ctc head"}, + ) + text_ctc_conv_kernel: int = field( + default=2, + metadata={"help": "text_ctc_conv kernel size"}, + ) + mask_u2t: bool = field( + default=True, + metadata={"help": "mask the unit input in unit-to-text task"}, + ) + + # embedding mixing + mix_with_unit: bool = field( + default=True, + metadata={"help": "mix with the unit embeddings"}, + ) + use_pred_unit: bool = field( + default=False, + metadata={"help": "use the embeddings of predicted units"}, + ) + l2_embedding: bool = field( + default=False, + metadata={"help": "compute l2 loss between unit embedding and unit hidden state"}, + ) + + # Finetune related + encoder_dict_size: int = field( + default=-1, + metadata={"help": "text encoder dictionary dimension"}, + ) + + decoder_dict_size: int = field( + default=-1, + metadata={"help": "decoder dictionary dimension"}, + ) + + +@register_model("speechut", dataclass=SpeechutConfig) +class SpeechutModel(BaseFairseqModel): + def __init__( + self, + cfg: SpeechutConfig, + task_cfg: HubertPretrainingConfig, + dictionaries: List[Dictionary], + unit_dictionary: Dictionary = None, + text_tgt_dictionary: Dictionary = None, + ) -> None: + super().__init__() + logger.info(f"SpeechutModel Config: {cfg}") + + feature_enc_layers = eval(cfg.conv_feature_layers) # noqa + self.embed = feature_enc_layers[-1][0] + + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + mode=cfg.extractor_mode, + conv_bias=cfg.conv_bias, + ) + feature_ds_rate = np.prod([s for _, _, s in feature_enc_layers]) + self.feat2tar_ratio = cfg.label_rate * feature_ds_rate / task_cfg.sample_rate + + self.post_extract_proj = ( + nn.Linear(self.embed, cfg.encoder_embed_dim) + if self.embed != cfg.encoder_embed_dim + else None + ) + + self.mask_prob = cfg.mask_prob + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length = cfg.mask_length + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + self.logit_temp = cfg.logit_temp + self.skip_masked = cfg.skip_masked + self.skip_nomask = cfg.skip_nomask + + final_dim = cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim + + self.mask_emb = nn.Parameter( + torch.FloatTensor(cfg.encoder_embed_dim).uniform_() + ) + + self.encoder = TransformerEncoder(cfg) + self.layer_norm = LayerNorm(self.embed) + + self.target_glu = None + if cfg.target_glu: + self.target_glu = nn.Sequential( + nn.Linear(final_dim, final_dim * 2), nn.GLU() + ) + + self.final_dim = final_dim + assert len(dictionaries) <= 2, f"Only support <=2 kinds of targets, get {len(dictionaries)} dictionaries" + if len(dictionaries) == 1: + dictionaries = [dictionaries[0], dictionaries[0]] + self.num_classes = [len(d) for d in dictionaries] + + self.final_proj = nn.Linear(cfg.encoder_embed_dim, final_dim) + self.code_encoder_proj = nn.Linear(cfg.text_transformer.encoder.embed_dim, self.num_classes[-1]) + self.final_proj_list = [self.final_proj, self.code_encoder_proj] + + self.label_embs_concat = nn.Parameter(torch.FloatTensor(self.num_classes[0], final_dim)) + self.label_embs_list = [self.label_embs_concat] + for p in self.label_embs_list: + nn.init.uniform_(p) + + ### build unit encoder: + self.mask_u2t = cfg.mask_u2t + self.add_text_ctc = cfg.add_text_ctc + self.text_ctc_conv_kernel = cfg.text_ctc_conv_kernel + self.padding_idx = unit_dictionary.pad() + self.unit_mask_idx = unit_dictionary.index("") + + self.add_unit_encoder = cfg.add_unit_encoder + self.mix_with_unit = cfg.mix_with_unit + self.use_pred_unit = cfg.use_pred_unit + self.l2_embedding = cfg.l2_embedding + if self.add_unit_encoder: + assert len(unit_dictionary) == self.num_classes[0], f"unit_dictionary: {len(unit_dictionary)}, self.num_classes[0]: {self.num_classes[0]}" + ### build unit pre-net, and shared with hubert label_embs if needed (default: False) + self.unit_embed_tokens = self.build_embedding( + unit_dictionary, + cfg.text_transformer.encoder.embed_dim, + ) + if self.final_dim == cfg.text_transformer.encoder.embed_dim: + logger.info("Share label_embs[0] with unit_embed_tokens ...") + nn.init.uniform_(self.unit_embed_tokens.weight) + self.label_embs_list[0] = self.unit_embed_tokens.weight + + ### build unit encoder + self.unit_encoder = TransformerEncoderBase( + cfg.text_transformer, + unit_dictionary, + self.unit_embed_tokens, + use_rel_pos_enc=cfg.use_rel_pos_enc, + scaling_for_att=cfg.scaling_for_att, + ) + + ### build text ctc head + if self.add_text_ctc: + conv = nn.Conv1d( + cfg.text_transformer.encoder.embed_dim, cfg.text_transformer.encoder.embed_dim, + self.text_ctc_conv_kernel, + stride=self.text_ctc_conv_kernel // 2, + bias=False, + padding=self.text_ctc_conv_kernel // 2, + ) + nn.init.kaiming_normal_(conv.weight) + self.unit_encoder_ctc_head = nn.Sequential( + Rotate3D(), + conv, + nn.Dropout(p=0.1), + nn.Sequential( + Rotate3D(), + Rotate3D(), + LayerNorm(cfg.text_transformer.encoder.embed_dim), + ), + nn.GELU(), + nn.Linear(cfg.text_transformer.encoder.embed_dim, len(text_tgt_dictionary)), + ) + + ### build unit2text decoder, not available for now + self.add_decoder = cfg.add_decoder + self.text_transformer_cfg = cfg.text_transformer + if self.add_decoder: + # To make sure that the decoder dict size is the same as the fine-tuning tgt_dict size or bpe code dict size + dec_dictionary = self.cutting_dictionary(text_tgt_dictionary, cfg.decoder_dict_size) + decoder_embed_tokens = self.build_embedding( + dec_dictionary, cfg.text_transformer.decoder.embed_dim + ) + if cfg.reset_decoder_embedding_config: + cfg.text_transformer.no_scale_embedding = False + cfg.text_transformer.layernorm_embedding = False + cfg.text_transformer.no_token_positional_embeddings = False + self.decoder = TransformerDecoderBaseScriptable(cfg.text_transformer, dec_dictionary, decoder_embed_tokens, use_rel_pos_enc=cfg.use_rel_pos_enc) + + + def cutting_dictionary(self, dictionary, dict_size): + if dictionary is None or dict_size <= 0: + return dictionary + else: + import copy + cut_dictionary = copy.deepcopy(dictionary) + if dict_size > len(cut_dictionary): + for i in range(dict_size - len(cut_dictionary)): + cut_dictionary.symbols.append(f'_{i}_') + else: + cut_dictionary.symbols = cut_dictionary.symbols[:dict_size] + return cut_dictionary + + def build_embedding(self, dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + return Embedding(num_embeddings, embed_dim, padding_idx) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: SpeechutConfig, task: HubertPretrainingTask): + """Build a new model instance.""" + unit_dictionary = getattr(task, "text_src_dictionary", None) + text_tgt_dictionary = getattr(task, "text_dictionary", None) + model = SpeechutModel(cfg, task.cfg, task.dictionaries, unit_dictionary, text_tgt_dictionary) + return model + + def apply_mask(self, x, padding_mask, target_list): + B, T, C = x.shape + if self.mask_prob > 0: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def forward_features(self, source: torch.Tensor) -> torch.Tensor: + if self.feature_grad_mult > 0: + features = self.feature_extractor(source) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = self.feature_extractor(source) + return features + + def forward_targets( + self, + features: torch.Tensor, + target_list: List[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Trim features to ensure labels exist and then get aligned labels + feat_tsz = features.size(2) + targ_tsz = min([t.size(1) for t in target_list]) + if self.feat2tar_ratio * feat_tsz > targ_tsz: + feat_tsz = int(targ_tsz / self.feat2tar_ratio) + features = features[..., :feat_tsz] + target_inds = torch.arange(feat_tsz).float() * self.feat2tar_ratio + target_inds += np.random.choice(int(self.feat2tar_ratio)) + target_list = [t[:, target_inds.long()] for t in target_list] + return features, target_list + + def forward_padding_mask( + self, + features: torch.Tensor, + padding_mask: torch.Tensor, + ) -> torch.Tensor: + extra = padding_mask.size(1) % features.size(1) + if extra > 0: + padding_mask = padding_mask[:, :-extra] + padding_mask = padding_mask.view(padding_mask.size(0), features.size(1), -1) + padding_mask = padding_mask.all(-1) + return padding_mask + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + def downsample_ctc_padding_mask(self, padding_mask): + """ + padding_mask: (B, T) + """ + stride = self.text_ctc_conv_kernel // 2 + return padding_mask[:, ::stride] + + def compute_pred(self, proj_x, label_embs): + if self.target_glu: + label_embs = self.target_glu(label_embs) + x = F.normalize(proj_x.float(), dim=-1) # (S, D) + label_embs = F.normalize(label_embs.float(), dim=-1) # (C, D) + logits = torch.matmul(x, label_embs.T).type_as(proj_x) # (S, C) + logits /= self.logit_temp + return logits + + def compute_hubert_logits(self, x, target, proj, label_embs, padding_mask, mask_indices): + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + proj_x_m = proj(x[masked_indices]) + logit_m_list = [(self.compute_pred(proj_x_m, label_embs), target[masked_indices])] + else: + logit_m_list = [None] + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + proj_x_u = proj(x[nomask_indices]) + logit_u_list = [(self.compute_pred(proj_x_u, label_embs), target[nomask_indices])] + else: + logit_u_list = [None] + + return logit_m_list, logit_u_list + + def compute_ce_logits(self, x, target, proj, padding_mask, mask_indices): + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + logit_m_list = [(proj(x[masked_indices]), target[masked_indices])] + else: + logit_m_list = [None] + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + logit_u_list = [(proj(x[nomask_indices]), target[nomask_indices])] + else: + logit_u_list = [None] + + return logit_m_list, logit_u_list + + def convert_embeddings(self, + x, + padding_mask, + target=None, + mask_indices=None, + mix_with_unit=False, + use_pred_unit=False, + l2_embedding=False, + remask=False + ): + """ + 1. Mix with units if needed (default: True) + 2. Prepare for unit_encoder inputs + Inputs: + x, (B, T, D) + Return: + src_tokens, (B, T) + soft_embeddings, (B, T, D) + l2_loss, a loss + """ + soft_embeddings = self.final_proj_list[0](x) if x.size(-1) == self.final_dim else x + if padding_mask is None: + padding_mask = soft_embeddings.new_zeros(soft_embeddings.size(0), soft_embeddings.size(1), dtype=torch.long) + if use_pred_unit: + src_tokens = self.compute_pred(self.final_proj_list[0](x), self.label_embs_list[0]).argmax(dim=-1) + src_tokens[padding_mask] = self.padding_idx + elif target is not None: + src_tokens = target + else: + src_tokens = padding_mask.long() + + if l2_embedding | mix_with_unit: + unit_embeddings = self.unit_embed_tokens(src_tokens) # (B, T, D) + + l2_loss = 0 + if l2_embedding: + if mask_indices is not None: + l2_loss = (soft_embeddings - unit_embeddings)[mask_indices].float().pow(2).mean(dim=-1) + scale = unit_embeddings[mask_indices].float().pow(2).sum(dim=-1) + else: + l2_loss = (soft_embeddings - unit_embeddings).float().pow(2).mean(dim=-1) + scale = unit_embeddings.float().pow(2).sum(dim=-1) + l2_loss = (l2_loss / scale).mean() + + if mix_with_unit: + B, T, D = x.shape + selected_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob / 2, + self.mask_length // 2, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + selected_indices = torch.from_numpy(selected_indices).to(x.device) + if mask_indices is not None: + if remask: + remask_indices = torch.logical_and(selected_indices, mask_indices) + soft_embeddings[remask_indices] = self.mask_emb + swap_indices = torch.logical_and(selected_indices, ~mask_indices) + else: + swap_indices = selected_indices + soft_embeddings[swap_indices] = unit_embeddings[swap_indices] + + soft_embeddings = soft_embeddings * (1 - padding_mask.unsqueeze(-1).type_as(x)) + return src_tokens, soft_embeddings, l2_loss + + def forward( + self, + source: torch.Tensor = None, + src_tokens: torch.Tensor = None, + src_lengths: torch.Tensor = None, + prev_output_tokens: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + assert source is not None or src_tokens is not None + if source is not None: + return self.forward_speech( + source=source, + target_list=target_list, + padding_mask=padding_mask, + mask=mask, + features_only=features_only, + output_layer=output_layer, + ) + else: + return self.forward_text( + src_tokens=src_tokens, + src_lengths=src_lengths, + prev_output_tokens=prev_output_tokens, + mask=self.mask_u2t, + features_only=features_only, + output_layer=output_layer, + ) + + def forward_speech( + self, + source: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + """output layer is 1-based""" + features = self.forward_features(source) + if target_list is not None: + features, target_list = self.forward_targets(features, target_list) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + + if mask: + x, mask_indices = self.apply_mask(features, padding_mask, target_list) + else: + x = features + mask_indices = None + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, _ = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1, + ) + + if features_only: + return {"x": x, "padding_mask": padding_mask, "features": features} + + logit_m_list, logit_u_list = self.compute_hubert_logits( + x, + target_list[0], + self.final_proj_list[0], + self.label_embs_list[0], + padding_mask, + mask_indices, + ) + + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + + if self.add_unit_encoder: + src_tokens, x_emb, l2_loss = self.convert_embeddings( + x, + padding_mask, target_list[0], + mask_indices=mask_indices, + mix_with_unit=self.mix_with_unit, + use_pred_unit=self.use_pred_unit, + l2_embedding=self.l2_embedding, + ) + encoder_out = self.unit_encoder(src_tokens, token_embeddings=x_emb) + + result['encoder_out'] = encoder_out['encoder_out'] # [(T, B, D)] + result['encoder_padding_mask'] = encoder_out['encoder_padding_mask'] # [(B, T)] + if self.l2_embedding: + result['embedding_l2_loss'] = l2_loss + + code_logit_m_list, code_logit_u_list = self.compute_ce_logits( + encoder_out['encoder_out'][0].transpose(0, 1), # -> (B, T, C) + target_list[-1], + self.final_proj_list[1], + padding_mask, + mask_indices, + ) + result['logit_m_list'] += code_logit_m_list + result['logit_u_list'] += code_logit_u_list + return result + + def forward_text( + self, + src_tokens: torch.Tensor = None, + src_lengths: torch.Tensor = None, + prev_output_tokens: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + assert self.add_unit_encoder, f"Can not forward unit-text branch without unit_encoder!" + + padding_mask = src_tokens == self.padding_idx + unit_embeddings = self.unit_embed_tokens(src_tokens) + if mask: + unit_embeddings, mask_indices = self.apply_mask(unit_embeddings, padding_mask, [src_tokens]) + + encoder_out = self.unit_encoder( + src_tokens, + token_embeddings=unit_embeddings, + return_all_hiddens=output_layer is not None, + ) + + result = {} + result["encoder_out"] = encoder_out["encoder_out"] + result["encoder_states"] = encoder_out["encoder_states"] + result["padding_mask"] = padding_mask + + if self.add_text_ctc: + result["encoder_out_ctc"] = [self.unit_encoder_ctc_head(x) for x in encoder_out['encoder_out']] + result["encoder_padding_mask"] = [ + self.downsample_ctc_padding_mask(padding_mask) for padding_mask in encoder_out['encoder_padding_mask'] + ] + + if features_only: + return result + if self.add_decoder: + assert prev_output_tokens is not None + decoder_out = self.decoder( + prev_output_tokens=prev_output_tokens, encoder_out=encoder_out, + ) + result['decoder_out'] = decoder_out + return result + + def forward_mum(self, src_tokens, target, mask=True): + target_list = [target] + padding_mask = src_tokens.eq(self.unit_encoder.padding_idx) + unit_embeddings = self.unit_embed_tokens(src_tokens) + if mask: + unit_embeddings, mask_indices = self.apply_mask(unit_embeddings, padding_mask, target_list) + else: + ### If already applied mask on src_tokens, then the target_list should contains many padding_idx + mask_indices = target_list[-1] != self.padding_idx + unit_embeddings[mask_indices] = self.mask_emb + + encoder_out = self.unit_encoder( + src_tokens, + token_embeddings=unit_embeddings, + ) + code_logit_m_list, code_logit_u_list = self.compute_ce_logits( + encoder_out["encoder_out"][0].transpose(0, 1), + target_list[-1], + self.final_proj_list[1], + padding_mask, + mask_indices, + ) + result = {} + result["logit_m_list"] = code_logit_m_list + result["logit_u_list"] = code_logit_u_list + result["padding_mask"] = padding_mask + return result + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + **kwargs, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Extract encoder features for only speech input""" + res = self.forward( + source, + padding_mask=padding_mask, + mask=mask, + features_only=True, + output_layer=output_layer, + ) + x = res["x"] # B x T x D + padding_mask = res["padding_mask"] + + if self.add_unit_encoder: + src_tokens, x, _ = self.convert_embeddings( + x, + padding_mask, + mix_with_unit=False, + use_pred_unit=False, + ) + encoder_out = self.unit_encoder( + src_tokens, + token_embeddings=x, + return_all_hiddens=output_layer is not None + ) + res["x"] = encoder_out['encoder_out'][0].transpose(0, 1) # (B, T, D) + + feature = res["features"] if ret_conv else res["x"] + if output_layer is not None: + feature = encoder_out['encoder_states'] + + return feature, padding_mask + + def get_logits(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + logits_list = [x[0].float() for x in logits_list if x is not None] + return logits_list + + def get_targets(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + targets_list = [x[1].long() for x in logits_list if x is not None] + return targets_list + + def get_extra_losses(self, net_output): + extra_losses = [] + names = [] + + if "features_pen" in net_output: + extra_losses.append(net_output["features_pen"]) + names.append("features_pen") + + if "embedding_l2_loss" in net_output: + extra_losses.append(net_output["embedding_l2_loss"]) + names.append("embedding_l2_loss") + + return extra_losses, names + + def remove_pretraining_modules(self, step2=False): + self.target_glu = None + + def load_checkpoint(self, checkpoint: str): + if not PathManager.exists(checkpoint): + raise IOError("Model file not found: {}".format(checkpoint)) + state = checkpoint_utils.load_checkpoint_to_cpu(checkpoint) + return state + +class Rotate3D(nn.Module): + """ + (T, B, D) --> (B, D, T) --> (D, T, B) --> (T, B, D) + """ + def __init__(self): + super().__init__() + + def forward(self, x): + return x.permute(1, 2, 0) diff --git a/SpeechT5/Speech2S/speech2s/models/speechut_asr.py b/SpeechT5/Speech2S/speech2s/models/speechut_asr.py new file mode 100644 index 0000000000000000000000000000000000000000..f9ec9d8488b4f7e552804d355de000c80fb35b78 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/models/speechut_asr.py @@ -0,0 +1,165 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import contextlib +import torch +from dataclasses import dataclass, field +from fairseq import utils +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.fairseq_encoder import FairseqEncoder +from fairseq.models.hubert import HubertAsrConfig, HubertEncoder +from fairseq.tasks import FairseqTask + +@dataclass +class SpeechUTASRConfig(HubertAsrConfig): + add_decoder: bool = field( + default=True, + metadata={"help": "add decoder for fine-tune"}, + ) + +@register_model("speechut_asr", dataclass=SpeechUTASRConfig) +class SpeechUTASR(BaseFairseqModel): + """ + A encoder-ctc-decoder model if cfg.add_decoder is True, or a encoder-ctc model + """ + def __init__(self, cfg: SpeechUTASRConfig, encoder: FairseqEncoder): + super().__init__() + self.cfg = cfg + self.encoder = encoder + if not cfg.add_decoder: + self.encoder.w2v_model.decoder = None + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: SpeechUTASRConfig, task: FairseqTask): + """Build a new model instance.""" + encoder = SpeechUTEncoder(cfg, task) + return cls(cfg, encoder) + + def forward(self, source, padding_mask, prev_output_tokens, **kwargs): + encoder_out = self.encoder(source, padding_mask, **kwargs) + + x = self.encoder.final_dropout(encoder_out['encoder_out'][0]) # (T, B, C) + if self.encoder.proj: + x = self.encoder.proj(x) + if self.encoder.conv_ctc_proj: + padding_mask = self.encoder.w2v_model.downsample_ctc_padding_mask(encoder_out["encoder_padding_mask"][0]) + else: + padding_mask = encoder_out["encoder_padding_mask"] + + decoder_out = self.decoder( + prev_output_tokens, encoder_out=encoder_out, **kwargs + ) if self.cfg.add_decoder else None + + return { + "encoder_out_ctc": x, # (T, B, C), for CTC loss + "padding_mask": padding_mask, # (B, T), for CTC loss + "decoder_out": decoder_out, # for ED loss + } + + def forward_decoder(self, prev_output_tokens, **kwargs): + return self.decoder(prev_output_tokens, **kwargs) + + def get_logits(self, net_output): + """For CTC decoding""" + logits = net_output["encoder_out"] + padding = net_output["encoder_padding_mask"] + if padding is not None and padding.any(): + padding = padding.T + logits[padding][..., 0] = 0 + logits[padding][..., 1:] = float("-inf") + + return logits + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """For 1) computing CTC loss, 2) decoder decoding.""" + + if "encoder_out_ctc" in net_output: + logits = net_output["encoder_out_ctc"] + else: + return self.decoder.get_normalized_probs(net_output, log_probs, sample) + + if isinstance(logits, list): + logits = logits[0] + + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + @property + def decoder(self): + return self.encoder.w2v_model.decoder + + +class SpeechUTEncoder(HubertEncoder): + """ + Modified from fairseq.models.hubert.hubert_asr.HubertEncoder + 1. make it compatible with encoder-decoder model + """ + def __init__(self, cfg: HubertAsrConfig, task): + super().__init__(cfg, task) + + if (task.target_dictionary is not None) and ( + hasattr(self.w2v_model, "unit_encoder_ctc_head") + ): + self.proj = self.w2v_model.unit_encoder_ctc_head + self.conv_ctc_proj = True + else: + self.conv_ctc_proj = False + + def forward(self, source, padding_mask, tbc=True, **kwargs): + w2v_args = { + "source": source, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + } + ft = self.freeze_finetune_updates <= self.num_updates + with torch.no_grad() if not ft else contextlib.ExitStack(): + x, padding_mask = self.w2v_model.extract_features(**w2v_args) + if tbc: + # B x T x C -> T x B x C + x = x.transpose(0, 1) + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [padding_mask], # B x T + } + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + + Forward the encoder out. + """ + x, padding_mask = self.w2v_model.extract_features(**net_input, mask=False) + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + encoder_out = { + "encoder_out" : [x], + "encoder_padding_mask" : [padding_mask], + } + if self.proj: + x = self.proj(x) + encoder_out["encoder_out_ctc"] = x + + return encoder_out + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = [ + x.index_select(1, new_order) for x in encoder_out["encoder_out"] + ] + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = [ + x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"] + ] + return encoder_out diff --git a/SpeechT5/Speech2S/speech2s/models/speechut_st.py b/SpeechT5/Speech2S/speech2s/models/speechut_st.py new file mode 100644 index 0000000000000000000000000000000000000000..6faaccfc89748a2692bd1eaec200588449d10423 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/models/speechut_st.py @@ -0,0 +1,221 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import contextlib +import torch +import torch.nn as nn +from argparse import Namespace +from dataclasses import dataclass +from typing import Any +from fairseq import checkpoint_utils, tasks +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.fairseq_encoder import FairseqEncoder +from fairseq.tasks import FairseqTask +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.data.data_utils import lengths_to_padding_mask + +from fairseq.models.hubert import HubertAsrConfig + +logger = logging.getLogger(__name__) + +@dataclass +class SpeechUTS2TConfig(HubertAsrConfig): + ### the following config is only for the compatibility to fairseq speech_to_text task + input_feat_per_channel: Any = None + input_channels: Any = None + speaker_to_id: Any = None + +@register_model("speechut_st_legacy", dataclass=SpeechUTS2TConfig) +class SpeechUTS2T(BaseFairseqModel): + """An encoder-decoder model.""" + def __init__(self, cfg: SpeechUTS2TConfig, encoder: FairseqEncoder): + super().__init__() + self.cfg = cfg + self.encoder = encoder + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: SpeechUTS2TConfig, task: FairseqTask): + """Build a new model instance.""" + encoder = SpeechUTEncoder(cfg, task) + return cls(cfg, encoder) + + def forward(self, src_tokens, src_lengths, prev_output_tokens, **kwargs): + encoder_out = self.encoder(src_tokens, src_lengths, **kwargs) + decoder_out = self.encoder.w2v_model.decoder( + prev_output_tokens, encoder_out=encoder_out, **kwargs + ) + return decoder_out + + def forward_decoder(self, prev_output_tokens, **kwargs): + return self.encoder.w2v_model.decoder(prev_output_tokens, **kwargs) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """For decoder decoding.""" + return self.encoder.w2v_model.decoder.get_normalized_probs(net_output, log_probs, sample) + + @property + def decoder(self): + return self.encoder.w2v_model.decoder + + +class SpeechUTEncoder(FairseqEncoder): + """ + Modified from fairseq.models.hubert.hubert_asr.HubertEncoder + 1. make it compatible with fairseq speech_to_text task + 2. make it compatible with encoder-decoder model + """ + def __init__(self, cfg: SpeechUTS2TConfig, task): + self.apply_mask = cfg.apply_mask + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + } + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + assert task.data_cfg.standardize_audio() == w2v_args.task.normalize, ( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + pretrain_task = tasks.setup_task(w2v_args.task, load_local_states=False) + assert state is not None and "task_state" in state, f"the stored dictionaries not found in checkpoint!" + # This will load the stored "dictionaries" object + pretrain_task.load_state_dict(state["task_state"]) + + model = pretrain_task.build_model(w2v_args.model, from_checkpoint=True) + if state is not None and not cfg.no_pretrained_weights: + try: + model.load_state_dict(state["model"], strict=True) + except Exception as e: + logger.warn(e) + model.load_state_dict(state["model"], strict=False) + + model.remove_pretraining_modules() + + super().__init__(pretrain_task.source_dictionary) + + d = w2v_args.model.encoder_embed_dim + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.num_updates = 0 + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, src_tokens=None, src_lengths=None, **kwargs): + + w2v_args = { + "source": src_tokens, + "padding_mask": lengths_to_padding_mask(src_lengths), + "mask": self.apply_mask and self.training, + } + + ft = self.freeze_finetune_updates <= self.num_updates + + with torch.no_grad() if not ft else contextlib.ExitStack(): + x, padding_mask = self.w2v_model.extract_features(**w2v_args) + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [padding_mask], # B x T + "padding_mask": [padding_mask], + } + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + + Forward the encoder out. + """ + _net_input = { + "source": net_input["src_tokens"], + "padding_mask": lengths_to_padding_mask(net_input["src_lengths"]), + "mask": False, + } + + x, padding_mask = self.w2v_model.extract_features(**_net_input) + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + encoder_out = { + "encoder_out" : [x], + "encoder_padding_mask" : [padding_mask], + } + return encoder_out + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = [ + x.index_select(1, new_order) for x in encoder_out["encoder_out"] + ] + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = [ + x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"] + ] + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/SpeechT5/Speech2S/speech2s/models/t5_transformer_lm.py b/SpeechT5/Speech2S/speech2s/models/t5_transformer_lm.py new file mode 100644 index 0000000000000000000000000000000000000000..3d16a2df00b692114f8d84d254cf486d09e1137b --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/models/t5_transformer_lm.py @@ -0,0 +1,25 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +from fairseq.models import ( + register_model_architecture, +) +from fairseq.models.transformer_lm import base_lm_architecture + + +@register_model_architecture(model_name="transformer_lm", arch_name="transformer_lm_t5") +def transformer_lm_t5(args): + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1280) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 6144) + args.decoder_layers = getattr(args, "decoder_layers", 20) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_fn = getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) diff --git a/SpeechT5/Speech2S/speech2s/modules/__init__.py b/SpeechT5/Speech2S/speech2s/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dad97814e515d8e68d68e4e031d4f9c9055f3864 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/modules/__init__.py @@ -0,0 +1,27 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +from .learned_positional_embedding import LearnedPositionalEmbedding +from .multihead_attention import MultiheadAttention +from .relative_pos_enc import RelativePositionalEncoding +from .transformer_layer import TransformerEncoderLayerBase, TransformerDecoderLayerBase +from .w2v_encoder import TransformerEncoder, TransformerSentenceEncoderLayer +from .transformer_encoder import TransformerEncoderBase +from .transformer_decoder import TransformerDecoderScriptable, TransformerDecoderBaseScriptable + +__all__ = [ + "MultiheadAttention", + "RelativePositionalEncoding", + "LearnedPositionalEmbedding", + "TransformerEncoderLayerBase", + "TransformerDecoderLayerBase", + "TransformerEncoder", + "TransformerSentenceEncoderLayer", + "TransformerEncoderBase", + "TransformerDecoderScriptable", + "TransformerDecoderBaseScriptable", +] diff --git a/SpeechT5/Speech2S/speech2s/modules/ctc_prefix_score.py b/SpeechT5/Speech2S/speech2s/modules/ctc_prefix_score.py new file mode 100644 index 0000000000000000000000000000000000000000..b42cbd819abf7bdd718bef3db3f553c8360ac384 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/modules/ctc_prefix_score.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +# Copyright 2018 Mitsubishi Electric Research Labs (Takaaki Hori) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import numpy as np +import six + + +class CTCPrefixScore(object): + """Compute CTC label sequence scores + which is based on Algorithm 2 in WATANABE et al. + "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," + but extended to efficiently compute the probablities of multiple labels + simultaneously + """ + + def __init__(self, x, blank, eos, xp): + self.xp = xp + self.logzero = -10000000000.0 + self.blank = blank + self.eos = eos + self.input_length = len(x) + self.x = x + + def initial_state(self): + """Obtain an initial CTC state + :return: CTC state + """ + # initial CTC state is made of a frame x 2 tensor that corresponds to + # r_t^n() and r_t^b(), where 0 and 1 of axis=1 represent + # superscripts n and b (non-blank and blank), respectively. + r = self.xp.full((self.input_length, 2), self.logzero, dtype=np.float32) + r[0, 1] = self.x[0, self.blank] + for i in six.moves.range(1, self.input_length): + r[i, 1] = r[i - 1, 1] + self.x[i, self.blank] + return r + + def __call__(self, y, cs, r_prev): + """Compute CTC prefix scores for next labels + :param y : prefix label sequence + :param cs : array of next labels + :param r_prev: previous CTC state + :return ctc_scores, ctc_states + """ + # initialize CTC states + output_length = len(y) - 1 # ignore sos + # new CTC states are prepared as a frame x (n or b) x n_labels tensor + # that corresponds to r_t^n(h) and r_t^b(h). + r = self.xp.ndarray((self.input_length, 2, len(cs)), dtype=np.float32) + xs = self.x[:, cs] + if output_length == 0: + r[0, 0] = xs[0] + r[0, 1] = self.logzero + else: + r[output_length - 1] = self.logzero + + # prepare forward probabilities for the last label + r_sum = self.xp.logaddexp( + r_prev[:, 0], r_prev[:, 1] + ) # log(r_t^n(g) + r_t^b(g)) + last = y[-1] + if output_length > 0 and last in cs: + log_phi = self.xp.ndarray((self.input_length, len(cs)), dtype=np.float32) + for i in six.moves.range(len(cs)): + log_phi[:, i] = r_sum if cs[i] != last else r_prev[:, 1] + else: + log_phi = r_sum + + # compute forward probabilities log(r_t^n(h)), log(r_t^b(h)), + # and log prefix probabilities log(psi) + start = max(output_length, 1) + log_psi = r[start - 1, 0] + for t in six.moves.range(start, self.input_length): + r[t, 0] = self.xp.logaddexp(r[t - 1, 0], log_phi[t - 1]) + xs[t] + r[t, 1] = ( + self.xp.logaddexp(r[t - 1, 0], r[t - 1, 1]) + self.x[t, self.blank] + ) + log_psi = self.xp.logaddexp(log_psi, log_phi[t - 1] + xs[t]) + + # get P(...eos|X) that ends with the prefix itself + eos_pos = self.xp.where(cs == self.eos)[0] + if len(eos_pos) > 0: + log_psi[eos_pos] = r_sum[-1] # log(r_T^n(g) + r_T^b(g)) + + # exclude blank probs + blank_pos = self.xp.where(cs == self.blank)[0] + if len(blank_pos) > 0: + log_psi[blank_pos] = self.logzero + + # return the log prefix probability and CTC states, where the label axis + # of the CTC states is moved to the first axis to slice it easily + return log_psi, self.xp.rollaxis(r, 2) diff --git a/SpeechT5/Speech2S/speech2s/modules/learned_positional_embedding.py b/SpeechT5/Speech2S/speech2s/modules/learned_positional_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..20c8558e20b2172a8c607e2f5c32aa146ff2b9cf --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/modules/learned_positional_embedding.py @@ -0,0 +1,69 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/modules/learned_positional_embedding.py + 1. Add clamping if the input length exceeds the max-source-tokens +""" + +from typing import Dict, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from torch import Tensor + + +class LearnedPositionalEmbedding(nn.Embedding): + """ + This module learns positional embeddings up to a fixed maximum size. + Padding ids are ignored by either offsetting based on padding_idx + or by setting padding_idx to None and ensuring that the appropriate + position ids are passed to the forward function. + """ + + def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: int): + super().__init__(num_embeddings, embedding_dim, padding_idx) + self.onnx_trace = False + if self.padding_idx is not None: + self.max_positions = self.num_embeddings - self.padding_idx - 1 + else: + self.max_positions = self.num_embeddings + + def forward( + self, + input: Tensor, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + positions: Optional[Tensor] = None, + ): + """Input is expected to be of size [bsz x seqlen].""" + assert (positions is None) or ( + self.padding_idx is None + ), "If positions is pre-computed then padding_idx should not be set." + + if positions is None: + if incremental_state is not None: + # positions is the same for every token when decoding a single step + # Without the int() cast, it doesn't work in some cases when exporting to ONNX + positions = torch.zeros( + (1, 1), device=input.device, dtype=input.dtype + ).fill_(int(self.padding_idx + input.size(1))) + else: + positions = utils.make_positions( + input, self.padding_idx, onnx_trace=self.onnx_trace + ) + positions = torch.clamp(positions, max=self.padding_idx + self.max_positions) + return F.embedding( + positions, + self.weight, + self.padding_idx, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.sparse, + ) diff --git a/SpeechT5/Speech2S/speech2s/modules/multihead_attention.py b/SpeechT5/Speech2S/speech2s/modules/multihead_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..89f46ab628ebe7faa1a3db2fd4f31a7269bb006a --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/modules/multihead_attention.py @@ -0,0 +1,346 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +from typing import Dict, Optional, Tuple + +import torch +import torch.nn.functional as F +from fairseq import utils +from torch import Tensor + +from fairseq.modules import MultiheadAttention as FairseqMultiheadAttention + + +class MultiheadAttention(FairseqMultiheadAttention): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + scaling_for_att=1.0 + ): + super().__init__( + embed_dim, + num_heads, + kdim, + vdim, + dropout, + bias, + add_bias_kv, + add_zero_attn, + self_attention, + encoder_decoder_attention, + q_noise, + qn_block_size, + ) + self.scaling_for_att = scaling_for_att + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + position_bias: Optional[Tensor] = None, + ) -> Tuple[Tensor, Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + is_tpu = query.device.type == "xla" + + tgt_len, bsz, embed_dim = query.size() + src_len = tgt_len + assert embed_dim == self.embed_dim, f"query dim {embed_dim} != {self.embed_dim}" + assert list(query.size()) == [tgt_len, bsz, embed_dim] + if key is not None: + src_len, key_bsz, _ = key.size() + if not torch.jit.is_scripting(): + assert key_bsz == bsz + assert value is not None + assert src_len, bsz == value.shape[:2] + + if ( + not self.onnx_trace + and not is_tpu # don't use PyTorch version on TPUs + and incremental_state is None + and not static_kv + # A workaround for quantization to work. Otherwise JIT compilation + # treats bias in linear module as method. + and not torch.jit.is_scripting() + and position_bias is None + ): + assert key is not None and value is not None + return F.multi_head_attention_forward( + query, + key, + value, + self.embed_dim, + self.num_heads, + torch.empty([0]), + torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), + self.bias_k, + self.bias_v, + self.add_zero_attn, + self.dropout_module.p, + self.out_proj.weight, + self.out_proj.bias, + self.training or self.dropout_module.apply_during_inference, + key_padding_mask, + need_weights, + attn_mask, + use_separate_proj_weight=True, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + ) + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + q *= (1 / self.scaling_for_att) + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if k is not None: + k = ( + k.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + src_len = k.size(1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + assert k.size(1) == src_len + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + torch.zeros(key_padding_mask.size(0), 1).type_as( + key_padding_mask + ), + ], + dim=1, + ) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + if position_bias is not None: ## first order + ## position_bias: [241, 241, 64] + #print ("attn_weights: ", attn_weights.size()) # [492, 241, 241] + reshape_q = q.contiguous().view(bsz * self.num_heads, -1, self.head_dim).transpose(0,1) #[241, 492, 64] + #print ("reshape_q: ", reshape_q.size()) + B = torch.matmul(reshape_q, position_bias.transpose(-2, -1)) + #print ("B: ", B.size()) ## [241, 492, 241] + #B = B.transpose(0, 1).view(bsz, self.num_heads, position_bias.size(0), position_bias.size(1)) + B = B.transpose(0, 1).view(bsz*self.num_heads, position_bias.size(0), position_bias.size(1)) + #print ("B 2: ", B.size()) + attn_weights += B + + attn_weights *= self.scaling_for_att + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + if not is_tpu: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if self.scaling_for_att > 1.0: + attn_weights = attn_weights - attn_weights.detach().max(dim=-1, keepdim=True)[0] + + if before_softmax: + return attn_weights, v + + attn_weights_float = utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace + ) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + if self.onnx_trace and attn.size(1) == 1: + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + attn_weights = attn_weights_float.view( + bsz, self.num_heads, tgt_len, src_len + ).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights diff --git a/SpeechT5/Speech2S/speech2s/modules/relative_pos_enc.py b/SpeechT5/Speech2S/speech2s/modules/relative_pos_enc.py new file mode 100644 index 0000000000000000000000000000000000000000..7021fc0941fef310ca5571c101b8a8e18ffc1db6 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/modules/relative_pos_enc.py @@ -0,0 +1,33 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import torch + +class RelativePositionalEncoding(torch.nn.Module): + def __init__(self, d_model, maxlen=1000, embed_v=False): + super(RelativePositionalEncoding, self).__init__() + + self.d_model = d_model + self.maxlen = maxlen + self.pe_k = torch.nn.Embedding(2*maxlen, d_model) + if embed_v: + self.pe_v = torch.nn.Embedding(2*maxlen, d_model) + self.embed_v = embed_v + + + def forward(self, pos_seq, incremental_state=None): + pos_seq[pos_seq < -self.maxlen] = -self.maxlen + pos_seq[pos_seq >= self.maxlen] = self.maxlen - 1 + pos_seq = pos_seq + self.maxlen + + if incremental_state is not None: + pos_seq = pos_seq[-1:] + + if self.embed_v: + return self.pe_k(pos_seq), self.pe_v(pos_seq) + else: + return self.pe_k(pos_seq), None diff --git a/SpeechT5/Speech2S/speech2s/modules/transformer_decoder.py b/SpeechT5/Speech2S/speech2s/modules/transformer_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..84417b44b2672e49cf92bad8355d2dae48661b55 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/modules/transformer_decoder.py @@ -0,0 +1,543 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/transformer/transformer_decoder.py +""" + +import math +from typing import Any, Dict, List, Optional + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqIncrementalDecoder +from fairseq.models.transformer import TransformerConfig +from fairseq.modules import ( + AdaptiveSoftmax, + BaseLayer, + FairseqDropout, + LayerDropModuleList, + LayerNorm, + PositionalEmbedding, + SinusoidalPositionalEmbedding, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from torch import Tensor + +from speechut.modules import transformer_layer +from speechut.modules import RelativePositionalEncoding + +# rewrite name for backward compatibility in `make_generation_fast_` +def module_name_fordropout(module_name: str) -> str: + if module_name == "TransformerDecoderBase": + return "TransformerDecoder" + else: + return module_name + + +class TransformerDecoderBase(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *cfg.decoder.layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + cfg, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + use_rel_pos_enc=False, + ): + self.cfg = cfg + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + self._future_mask = torch.empty(0) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.decoder_layerdrop = cfg.decoder.layerdrop + self.share_input_output_embed = cfg.share_decoder_input_output_embed + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = cfg.decoder.embed_dim + self.embed_dim = embed_dim + self.output_embed_dim = cfg.decoder.output_dim + + self.padding_idx = embed_tokens.padding_idx + self.max_target_positions = cfg.max_target_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + self.embed_positions = ( + PositionalEmbedding( + self.max_target_positions, + embed_dim, + self.padding_idx, + learned=cfg.decoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + self.cross_self_attention = cfg.cross_self_attention + + if self.decoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.decoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.use_rel_pos_enc = use_rel_pos_enc + self.layers.extend( + [ + self.build_decoder_layer(cfg, no_encoder_attn) + for _ in range(cfg.decoder.layers) + ] + ) + self.num_layers = len(self.layers) + + if cfg.decoder.normalize_before and not cfg.no_decoder_final_norm: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + + self.project_out_dim = ( + Linear(embed_dim, self.output_embed_dim, bias=False) + if embed_dim != self.output_embed_dim and not cfg.tie_adaptive_weights + else None + ) + + self.adaptive_softmax = None + self.output_projection = output_projection + if self.output_projection is None: + self.build_output_projection(cfg, dictionary, embed_tokens) + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(embed_dim // cfg.decoder.attention_heads, 24) + + def build_output_projection(self, cfg, dictionary, embed_tokens): + if cfg.adaptive_softmax_cutoff is not None: + self.adaptive_softmax = AdaptiveSoftmax( + len(dictionary), + self.output_embed_dim, + utils.eval_str_list(cfg.adaptive_softmax_cutoff, type=int), + dropout=cfg.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if cfg.tie_adaptive_weights else None, + factor=cfg.adaptive_softmax_factor, + tie_proj=cfg.tie_adaptive_proj, + ) + elif self.share_input_output_embed: + self.output_projection = nn.Linear( + self.embed_tokens.weight.shape[1], + self.embed_tokens.weight.shape[0], + bias=False, + ) + self.output_projection.weight = self.embed_tokens.weight + else: + self.output_projection = nn.Linear( + self.output_embed_dim, len(dictionary), bias=False + ) + nn.init.normal_( + self.output_projection.weight, mean=0, std=self.output_embed_dim ** -0.5 + ) + num_base_layers = cfg.base_layers + for i in range(num_base_layers): + self.layers.insert( + ((i + 1) * cfg.decoder.layers) // (num_base_layers + 1), + BaseLayer(cfg), + ) + + def build_decoder_layer(self, cfg, no_encoder_attn=False): + layer = transformer_layer.TransformerDecoderLayerBase(cfg, no_encoder_attn, has_relative_attention_bias=self.use_rel_pos_enc) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + + if not features_only: + x = self.output_layer(x) + return x, extra + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + return self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + + """ + A scriptable subclass of this class has an extract_features method and calls + super().extract_features, but super() is not supported in torchscript. A copy of + this function is made to be used in the subclass instead. + """ + + def extract_features_scriptable( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + """ + Similar to *forward* but only return features. + + Includes several features from "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + bs, slen = prev_output_tokens.size() + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + enc: Optional[Tensor] = None + padding_mask: Optional[Tensor] = None + if encoder_out is not None and len(encoder_out["encoder_out"]) > 0: + enc = encoder_out["encoder_out"][0] + assert ( + enc.size()[1] == bs + ), f"Expected enc.shape == (t, {bs}, c) got {enc.shape}" + if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0: + padding_mask = encoder_out["encoder_padding_mask"][0] + + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if self.use_rel_pos_enc: + pos_seq = torch.arange(0, slen).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, _ = self.pos_emb(pos_seq, incremental_state) + else: + pos_k = None + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + + # decoder layers + attn: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = layer( + x, + enc, + padding_mask, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + pos_bias=pos_k, + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + + if attn is not None: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": [attn], "inner_states": inner_states} + + def output_layer(self, features): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + return self.output_projection(features) + else: + return features + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround. + if ( + self._future_mask.size(0) == 0 + or (not self._future_mask.device == tensor.device) + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1 + ) + self._future_mask = self._future_mask.to(tensor) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = "{}.embed_positions.weights".format(name) + if weights_key in state_dict: + del state_dict[weights_key] + state_dict[ + "{}.embed_positions._float_tensor".format(name) + ] = torch.FloatTensor(1) + + if f"{name}.output_projection.weight" not in state_dict: + if self.share_input_output_embed: + embed_out_key = f"{name}.embed_tokens.weight" + else: + embed_out_key = f"{name}.embed_out" + if embed_out_key in state_dict: + state_dict[f"{name}.output_projection.weight"] = state_dict[ + embed_out_key + ] + if not self.share_input_output_embed: + del state_dict[embed_out_key] + + for i in range(self.num_layers): + # update layer norms + layer_norm_map = { + "0": "self_attn_layer_norm", + "1": "encoder_attn_layer_norm", + "2": "final_layer_norm", + } + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m) + if k in state_dict: + state_dict[ + "{}.layers.{}.{}.{}".format(name, i, new, m) + ] = state_dict[k] + del state_dict[k] + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m + +class TransformerDecoderBaseScriptable(TransformerDecoderBase): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + return x, None + + +class TransformerDecoder(TransformerDecoderBase): + def __init__( + self, + args, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + ): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + no_encoder_attn=no_encoder_attn, + output_projection=output_projection, + use_rel_pos_enc=getattr(args, "use_rel_pos_enc", False), + ) + + def build_output_projection(self, args, dictionary, embed_tokens): + super().build_output_projection( + TransformerConfig.from_namespace(args), dictionary, embed_tokens + ) + + def build_decoder_layer(self, args, no_encoder_attn=False): + return super().build_decoder_layer( + TransformerConfig.from_namespace(args), no_encoder_attn=no_encoder_attn + ) + +class TransformerDecoderScriptable(TransformerDecoder): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + return x, None diff --git a/SpeechT5/Speech2S/speech2s/modules/transformer_encoder.py b/SpeechT5/Speech2S/speech2s/modules/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..f94e1fed8a005ec59d1e422157e08d88ff95bfda --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/modules/transformer_encoder.py @@ -0,0 +1,401 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import math +from typing import Dict, List, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqEncoder +from fairseq.modules import ( + FairseqDropout, + LayerDropModuleList, + LayerNorm, + SinusoidalPositionalEmbedding, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from torch import Tensor +from fairseq.models.transformer import ( + TransformerConfig, +) + + +from speechut.modules import transformer_layer, LearnedPositionalEmbedding +from speechut.modules import RelativePositionalEncoding + +# rewrite name for backward compatibility in `make_generation_fast_` +def module_name_fordropout(module_name: str) -> str: + if module_name == "TransformerEncoderBase": + return "TransformerEncoder" + else: + return module_name + + +class TransformerEncoderBase(FairseqEncoder): + """ + Transformer encoder consisting of *cfg.encoder.layers* layers. Each layer + is a :class:`TransformerEncoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): encoding dictionary + embed_tokens (torch.nn.Embedding): input embedding + """ + + def __init__(self, cfg, dictionary, embed_tokens, use_rel_pos_enc=False, scaling_for_att=1.0): + self.cfg = cfg + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.encoder_layerdrop = cfg.encoder.layerdrop + + embed_dim = embed_tokens.embedding_dim + self.padding_idx = embed_tokens.padding_idx + self.max_source_positions = cfg.max_source_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + self.embed_positions = ( + PositionalEmbedding( + cfg.max_source_positions, + embed_dim, + self.padding_idx, + learned=cfg.encoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + if self.encoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.encoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.use_rel_pos_enc = use_rel_pos_enc + self.scaling_for_att = scaling_for_att + self.layers.extend( + [self.build_encoder_layer(cfg) for i in range(cfg.encoder.layers)] + ) + self.num_layers = len(self.layers) + + if cfg.encoder.normalize_before: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(embed_dim // cfg.encoder.attention_heads, 160) + + def build_encoder_layer(self, cfg): + layer = transformer_layer.TransformerEncoderLayerBase(cfg, has_relative_attention_bias=self.use_rel_pos_enc, scaling_for_att=self.scaling_for_att) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward_embedding( + self, src_tokens, token_embedding: Optional[torch.Tensor] = None + ): + # embed tokens and positions + if token_embedding is None: + token_embedding = self.embed_tokens(src_tokens) + x = embed = self.embed_scale * token_embedding + if self.embed_positions is not None: + x = embed + self.embed_positions(src_tokens) + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + x = self.dropout_module(x) + if self.quant_noise is not None: + x = self.quant_noise(x) + return x, embed + + def forward( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + uniformity_layers: Optional[List[int]] = None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + return self.forward_scriptable( + src_tokens, src_lengths, return_all_hiddens, token_embeddings, uniformity_layers + ) + + # TorchScript doesn't support super() method so that the scriptable Subclass + # can't access the base class model in Torchscript. + # Current workaround is to add a helper function with different name and + # call the helper function from scriptable Subclass. + def forward_scriptable( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + uniformity_layers: Optional[List[int]] = None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + # compute padding mask + encoder_padding_mask = src_tokens.eq(self.padding_idx) + has_pads = src_tokens.device.type == "xla" or encoder_padding_mask.any() + + x, encoder_embedding = self.forward_embedding(src_tokens, token_embeddings) + + # account for padding while computing the representation + if has_pads: + x = x * (1 - encoder_padding_mask.unsqueeze(-1).type_as(x)) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if self.use_rel_pos_enc: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + encoder_states = [] + uniformity_hiddens = [] + + if return_all_hiddens: + encoder_states.append(x) + + if uniformity_layers is not None and 0 in uniformity_layers: + x = F.normalize(x.float(), dim=-1).type_as(x) + uniformity_hiddens.append(x) + + # encoder layers + for i, layer in enumerate(self.layers): + x = layer( + x, encoder_padding_mask=encoder_padding_mask if has_pads else None, + pos_bias=pos_k, + ) + if uniformity_layers is not None and i+1 in uniformity_layers: + x = F.normalize(x.float(), dim=-1).type_as(x) + uniformity_hiddens.append(x) + if return_all_hiddens: + assert encoder_states is not None + encoder_states.append(x) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in + # `forward` so we use a dictionary instead. + # TorchScript does not support mixed values so the values are all lists. + # The empty list is equivalent to None. + src_lengths = ( + src_tokens.ne(self.padding_idx) + .sum(dim=1, dtype=torch.int32) + .reshape(-1, 1) + .contiguous() + ) + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask], # B x T + "encoder_embedding": [encoder_embedding], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "uniformity_hiddens": uniformity_hiddens, # List[T x B x C] + "src_tokens": [], + "src_lengths": [src_lengths], + } + + @torch.jit.export + def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + if len(encoder_out["encoder_out"]) == 0: + new_encoder_out = [] + else: + new_encoder_out = [encoder_out["encoder_out"][0].index_select(1, new_order)] + if len(encoder_out["encoder_padding_mask"]) == 0: + new_encoder_padding_mask = [] + else: + new_encoder_padding_mask = [ + encoder_out["encoder_padding_mask"][0].index_select(0, new_order) + ] + if len(encoder_out["encoder_embedding"]) == 0: + new_encoder_embedding = [] + else: + new_encoder_embedding = [ + encoder_out["encoder_embedding"][0].index_select(0, new_order) + ] + + if len(encoder_out["src_tokens"]) == 0: + src_tokens = [] + else: + src_tokens = [(encoder_out["src_tokens"][0]).index_select(0, new_order)] + + if len(encoder_out["src_lengths"]) == 0: + src_lengths = [] + else: + src_lengths = [(encoder_out["src_lengths"][0]).index_select(0, new_order)] + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_embedding": new_encoder_embedding, # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": src_tokens, # B x T + "src_lengths": src_lengths, # B x 1 + } + + def max_positions(self): + """Maximum input length supported by the encoder.""" + if self.embed_positions is None: + return self.max_source_positions + return min(self.max_source_positions, self.embed_positions.max_positions) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = "{}.embed_positions.weights".format(name) + if weights_key in state_dict: + print("deleting {0}".format(weights_key)) + del state_dict[weights_key] + state_dict[ + "{}.embed_positions._float_tensor".format(name) + ] = torch.FloatTensor(1) + for i in range(self.num_layers): + # update layer norms + self.layers[i].upgrade_state_dict_named( + state_dict, "{}.layers.{}".format(name, i) + ) + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + return state_dict + + +class TransformerEncoder(TransformerEncoderBase): + def __init__(self, args, dictionary, embed_tokens): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + use_rel_pos_enc=getattr(args, "use_rel_pos_enc", False), + scaling_for_att=getattr(args, "scaling_for_att", 1.0), + ) + + def build_encoder_layer(self, args): + return super().build_encoder_layer( + TransformerConfig.from_namespace(args), + ) + + +def PositionalEmbedding( + num_embeddings: int, + embedding_dim: int, + padding_idx: int, + learned: bool = False, +): + if learned: + # if padding_idx is specified then offset the embedding ids by + # this index and adjust num_embeddings appropriately + # TODO: The right place for this offset would be inside + # LearnedPositionalEmbedding. Move this there for a cleaner implementation. + if padding_idx is not None: + num_embeddings = num_embeddings + padding_idx + 1 + m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + if padding_idx is not None: + nn.init.constant_(m.weight[padding_idx], 0) + else: + m = SinusoidalPositionalEmbedding( + embedding_dim, + padding_idx, + init_size=num_embeddings + padding_idx + 1, + ) + return m diff --git a/SpeechT5/Speech2S/speech2s/modules/transformer_layer.py b/SpeechT5/Speech2S/speech2s/modules/transformer_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..a71a848f1a5436756168aafd12d71637520b6b67 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/modules/transformer_layer.py @@ -0,0 +1,330 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/modules/transformer_layer.py + https://github.com/microsoft/SpeechT5/blob/main/Speech2C/speech2c/models/modules/transformer_decoder_layer.py +""" + +from typing import Dict, List, Optional + +import torch +from torch import Tensor +from fairseq.modules import LayerNorm +from fairseq.modules.transformer_layer import TransformerEncoderLayerBase as FairseqTransformerEncoderLayerBase +from fairseq.modules.transformer_layer import TransformerDecoderLayerBase as FairseqTransformerDecoderLayerBase + +from speechut.modules import MultiheadAttention + +class TransformerEncoderLayerBase(FairseqTransformerEncoderLayerBase): + """Encoder layer block. + + In the original paper each operation (multi-head attention or FFN) is + postprocessed with: `dropout -> add residual -> layernorm`. In the + tensor2tensor code they suggest that learning is more robust when + preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.encoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + + def __init__(self, cfg, has_relative_attention_bias=False, scaling_for_att=1.0): + self.scaling_for_att = scaling_for_att + super().__init__(cfg) + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embed_dim // cfg.encoder.attention_heads) + + def build_self_attention(self, embed_dim, cfg, scaling_for_att=1.0): + return MultiheadAttention( + embed_dim, + cfg.encoder.attention_heads, + dropout=cfg.attention_dropout, + self_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + scaling_for_att=self.scaling_for_att, + ) + + def forward( + self, + x, + encoder_padding_mask: Optional[Tensor], + attn_mask: Optional[Tensor] = None, + pos_bias=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, seq_len)` where padding elements are indicated by ``1``. + attn_mask (ByteTensor): binary tensor of shape `(tgt_len, src_len)`, + where `tgt_len` is the length of output and `src_len` is the + length of input, though here both are equal to `seq_len`. + `attn_mask[tgt_i, src_j] = 1` means that when calculating the + embedding for `tgt_i`, we exclude (mask out) `src_j`. This is + useful for strided self-attention. + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + # anything in original attn_mask = 1, becomes -1e8 + # anything in original attn_mask = 0, becomes 0 + # Note that we cannot use -inf here, because at some edge cases, + # the attention weight (before softmax) for some padded element in query + # will become -inf, which results in NaN in model parameters + if attn_mask is not None: + attn_mask = attn_mask.masked_fill( + attn_mask.to(torch.bool), -1e8 if x.dtype == torch.float32 else -1e4 + ) + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, _ = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=encoder_padding_mask, + need_weights=False, + attn_mask=attn_mask, + position_bias=pos_bias, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + return x + + + +class TransformerDecoderLayerBase(FairseqTransformerDecoderLayerBase): + """Decoder layer block. + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.decoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, cfg, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False, has_relative_attention_bias=False, scaling_for_att=1.0, + ): + self.scaling_for_att = scaling_for_att + super().__init__(cfg, + no_encoder_attn, + add_bias_kv, + add_zero_attn, + ) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embed_dim // cfg.decoder.attention_heads) + + def build_self_attention( + self, embed_dim, cfg, add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttention( + embed_dim, + cfg.decoder.attention_heads, + dropout=cfg.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=not cfg.cross_self_attention, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + scaling_for_att=self.scaling_for_att, + ) + + def build_encoder_attention(self, embed_dim, cfg): + return MultiheadAttention( + embed_dim, + cfg.decoder.attention_heads, + kdim=cfg.encoder.embed_dim, + vdim=cfg.encoder.embed_dim, + dropout=cfg.attention_dropout, + encoder_decoder_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + scaling_for_att=self.scaling_for_att, + ) + + def forward( + self, + x, + encoder_out: Optional[torch.Tensor] = None, + encoder_padding_mask: Optional[torch.Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + prev_self_attn_state: Optional[List[torch.Tensor]] = None, + prev_attn_state: Optional[List[torch.Tensor]] = None, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + pos_bias=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if need_head_weights: + need_attn = True + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state, saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1 + ) + if self_attn_padding_mask is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + if self.c_attn is not None: + tgt_len, bsz = x.size(0), x.size(1) + x = x.view(tgt_len, bsz, self.nh, self.head_dim) + x = torch.einsum("tbhd,h->tbhd", x, self.c_attn) + x = x.reshape(tgt_len, bsz, self.embed_dim) + if self.attn_ln is not None: + x = self.attn_ln(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + if self.encoder_attn is not None and encoder_out is not None: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + if self.ffn_layernorm is not None: + x = self.ffn_layernorm(x) + x = self.fc2(x) + x = self.dropout_module(x) + if self.w_resid is not None: + residual = torch.mul(self.w_resid, residual) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]] + return x, attn, self_attn_state + return x, attn, None + + def make_generation_fast_(self, need_attn: bool = False, **kwargs): + self.need_attn = need_attn diff --git a/SpeechT5/Speech2S/speech2s/modules/w2v_encoder.py b/SpeechT5/Speech2S/speech2s/modules/w2v_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..386f1eb0a4f4f67b552271e65c0b402d197e5bb2 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/modules/w2v_encoder.py @@ -0,0 +1,281 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + wav2vec encoder adding relitive position bias, modified from + https://github.com/microsoft/SpeechT5/blob/main/Speech2C/speech2c/models/modules/transformer_encoder.py + https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/wav2vec/wav2vec2.py +""" + +import math +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.dataclass import ChoiceEnum +from fairseq.modules import ( + LayerNorm, + SamePad, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from fairseq.utils import index_put +from fairseq.distributed import fsdp_wrap +from fairseq.models.wav2vec.utils import pad_to_multiple + +## reload multi-head attition with rel-pos-bias +from fairseq.models.wav2vec.wav2vec2 import TransformerEncoder as W2vTransformerEncoder +from speechut.modules import RelativePositionalEncoding +from speechut.modules import MultiheadAttention + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) + + +class TransformerEncoder(W2vTransformerEncoder): + def __init__(self, args): + super().__init__(args) + + self.dropout = args.dropout + self.embedding_dim = args.encoder_embed_dim + self.required_seq_len_multiple = args.required_seq_len_multiple + self.use_rel_pos_enc = getattr(args, "use_rel_pos_enc", False) + + self.pos_conv = nn.Conv1d( + self.embedding_dim, + self.embedding_dim, + kernel_size=args.conv_pos, + padding=args.conv_pos // 2, + groups=args.conv_pos_groups, + ) + dropout = 0 + std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim)) + nn.init.normal_(self.pos_conv.weight, mean=0, std=std) + nn.init.constant_(self.pos_conv.bias, 0) + + self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2) + self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU()) + + layers = [] + for _ in range(args.encoder_layers): + layer = TransformerSentenceEncoderLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=self.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + has_relative_attention_bias=self.use_rel_pos_enc, + ) + if args.checkpoint_activations: + layer = fsdp_wrap(layer) + layer = checkpoint_wrapper(layer) + layers.append(layer) + self.layers = nn.ModuleList(layers) + + self.layer_norm_first = args.layer_norm_first + self.layer_norm = LayerNorm(self.embedding_dim) + self.layerdrop = args.encoder_layerdrop + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(args.encoder_embed_dim // args.encoder_attention_heads, 160) + + + self.apply(init_bert_params) + + def forward(self, x, padding_mask=None, layer=None): + x, layer_results = self.extract_features(x, padding_mask, layer) + + if self.layer_norm_first and layer is None: + x = self.layer_norm(x) + + return x, layer_results + + def extract_features(self, x, padding_mask=None, tgt_layer=None): + + if padding_mask is not None: + x = index_put(x, padding_mask, 0) + + x_conv = self.pos_conv(x.transpose(1, 2)) + x_conv = x_conv.transpose(1, 2) + x = x + x_conv + + if not self.layer_norm_first: + x = self.layer_norm(x) + + # pad to the sequence length dimension + x, pad_length = pad_to_multiple( + x, self.required_seq_len_multiple, dim=-2, value=0 + ) + if pad_length > 0 and padding_mask is None: + padding_mask = x.new_zeros((x.size(0), x.size(1)), dtype=torch.bool) + padding_mask[:, -pad_length:] = True + else: + padding_mask, _ = pad_to_multiple( + padding_mask, self.required_seq_len_multiple, dim=-1, value=True + ) + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + if self.use_rel_pos_enc: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + layer_results = [] + r = None + for i, layer in enumerate(self.layers): + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, z = layer(x, self_attn_padding_mask=padding_mask, need_weights=False, pos_bias=pos_k) + if tgt_layer is not None: + # unpad if needed + if pad_length > 0: + layer_results.append( + ( + x[:-pad_length], + z[:, :-pad_length, :-pad_length] + if z is not None + else z, + ) + ) + else: + layer_results.append((x, z)) + if i == tgt_layer: + r = x + break + + if r is not None: + x = r + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + # undo paddding + if pad_length > 0: + x = x[:, :-pad_length] + + return x, layer_results + + +class TransformerSentenceEncoderLayer(nn.Module): + """ + Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained + models. + """ + + def __init__( + self, + embedding_dim: float = 768, + ffn_embedding_dim: float = 3072, + num_attention_heads: float = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + layer_norm_first: bool = False, + has_relative_attention_bias: bool = False, + ) -> None: + + super().__init__() + # Initialize parameters + self.embedding_dim = embedding_dim + self.dropout = dropout + self.activation_dropout = activation_dropout + + # Initialize blocks + self.activation_fn = utils.get_activation_fn(activation_fn) + self.self_attn = MultiheadAttention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + self_attention=True, + ) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(self.activation_dropout) + self.dropout3 = nn.Dropout(dropout) + + self.layer_norm_first = layer_norm_first + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = LayerNorm(self.embedding_dim) + self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim) + self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim) + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = LayerNorm(self.embedding_dim) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embedding_dim//num_attention_heads) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + att_args=None, + pos_bias=None, + ): + """ + LayerNorm is applied either before or after the self-attention/ffn + modules similar to the original Transformer imlementation. + """ + residual = x + + if self.layer_norm_first: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + x = self.dropout1(x) + x = residual + x + + residual = x + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + else: + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + position_bias=pos_bias, + ) + + x = self.dropout1(x) + x = residual + x + + x = self.self_attn_layer_norm(x) + + residual = x + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + x = self.final_layer_norm(x) + + return x, attn diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/base_speechut_for_asr.sh b/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/base_speechut_for_asr.sh new file mode 100644 index 0000000000000000000000000000000000000000..d5bc7311331208c3f2f65c17586c73ee63cd98f0 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/base_speechut_for_asr.sh @@ -0,0 +1,40 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 2 ] && echo "Usage: $0 [mount=${PWD}] [world_size=32] [update_freq=1]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +mount=$3 +world_size=$4 +update_freq=$5 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=1 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/base_speechut4asr_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/pretrain \ + --config-name speechut_base_librispeech \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + dataset.train_subset=\"train_960+pseudo_libritext.kmu-ltr+merge_960.kmu-none\" \ + dataset.valid_subset=\"dev_clean+dev.kmu-ltr+dev.kmu-none\" \ + dataset.num_workers=0 \ + dataset.max_tokens=1400000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=base_speechut4asr_${world_size}gpu_${update_freq}accum diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/base_speechut_for_st.sh b/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/base_speechut_for_st.sh new file mode 100644 index 0000000000000000000000000000000000000000..438a43f55275938c51faefab181dacc1af3567d0 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/base_speechut_for_st.sh @@ -0,0 +1,47 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [mount=${PWD}] [world_size=32] [update_freq=1]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +lang=$3 +mount=$4 +world_size=$5 +update_freq=$6 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=1 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/base_speechut4en${lang}_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/pretrain \ + --config-name speechut_base_librispeech \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + model.add_text_ctc=false \ + model.text_transformer.share_decoder_input_output_embed=true \ + criterion.u2t_ed_weight=1.0 \ + criterion.u2t_ctc_weight=0 \ + \ + dataset.train_subset=\"train_960,mustcuns_${lang}+pseudo_wmt_en${lang}.kmu-spm+train_960.kmu-none,mustcuns_${lang}.kmu-none\" \ + dataset.valid_subset=\"dev_clean+pseudo_valid.kmu-spm+dev.kmu-none\" \ + dataset.num_workers=0 \ + dataset.max_tokens=1400000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=base_speechut4en${lang}_${world_size}gpu_${update_freq}accum + diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/base_speechut_for_st_enfr.sh b/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/base_speechut_for_st_enfr.sh new file mode 100644 index 0000000000000000000000000000000000000000..c0c7217d0c124e603bb3b95ff11b7e7e462290c0 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/base_speechut_for_st_enfr.sh @@ -0,0 +1,48 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [lang=fr] [mount=${PWD}] [world_size=32] [update_freq=1]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +lang=$3 +mount=$4 +world_size=$5 +update_freq=$6 +[ -z $lang ] && lang=fr +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=1 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/base_speechut4en${lang}_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/pretrain \ + --config-name speechut_base_librispeech \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + model.add_text_ctc=false \ + criterion.u2t_ed_weight=1.0 \ + criterion.u2t_ctc_weight=0 \ + \ + dataset.train_subset=\"train_960,pretrain_mustc+pseudo_wmt14_enfr.kmu-spm+train_960.kmu-none,pretrain_mustc.kmu-none\" \ + dataset.valid_subset=\"dev_clean+pseudo_valid.kmu-spm+dev.kmu-none\" \ + dataset.num_workers=0 \ + dataset.max_tokens=1400000 \ + optimization.max_update=600000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=base_speechut4en${lang}_${world_size}gpu_${update_freq}accum + diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/large_speechut_for_asr.sh b/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/large_speechut_for_asr.sh new file mode 100644 index 0000000000000000000000000000000000000000..e9d64d789ed0421252edd71aa9c8268a42dc42f3 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/pretrain_speechut/large_speechut_for_asr.sh @@ -0,0 +1,41 @@ +# #################################### +# SpeechUT Large model # +# #################################### +[ $# -lt 2 ] && echo "Usage: $0 [mount=${PWD}] [world_size=32] [update_freq=4]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +mount=$3 +world_size=$4 +update_freq=$5 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=4 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/large_speechut4asr_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/pretrain \ + --config-name speechut_large_librilight \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + dataset.train_subset=\"train_small+pseudo_libritext.kmu-ltr\" \ + dataset.valid_subset=\"dev_clean+dev.kmu-ltr\" \ + dataset.num_workers=0 \ + dataset.max_tokens=900000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=large_speechut4asr_${world_size}gpu_${update_freq}accum + \ No newline at end of file diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/finetune960h_large_edctc.sh b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/finetune960h_large_edctc.sh new file mode 100644 index 0000000000000000000000000000000000000000..08a25818bc9fc519e65fa175886545a8650c0906 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/finetune960h_large_edctc.sh @@ -0,0 +1,45 @@ +# #################################### +# SpeechUT Large model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [mount=${PWD}] [world_size=8] [update_freq=3]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +w2v_path=$1 +DATA_DIR=$2 +cpt=$3 +mount=$4 +world_size=$5 +update_freq=$6 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=3 + +CODE_ROOT=${PWD} + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="${mount}/exp/finetune_asr/$exp_name/960h_edctc80k_from_${cpt}_bz3.3m_lr1e-5" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/finetune_asr \ + --config-name speechut_large_960h \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + model.w2v_path=${w2v_path} \ + \ + optimization.lr=[0.00001] \ + optimization.max_update=80000 \ + dataset.max_tokens=1100000 \ + optimization.update_freq=[${update_freq}] \ + distributed_training.distributed_world_size=${world_size} \ + \ + dataset.train_subset="train_960" \ + dataset.valid_subset="dev_other" \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=960h_edctc80k_from_${cpt}_bz3.3m_lr1e-5 diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/finetune_base_edctc.sh b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/finetune_base_edctc.sh new file mode 100644 index 0000000000000000000000000000000000000000..cad7bd0a11336a2b5e0c34372d57b7b4b953a414 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/finetune_base_edctc.sh @@ -0,0 +1,45 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [mount=${PWD}] [world_size=8] [update_freq=2]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +w2v_path=$1 +DATA_DIR=$2 +cpt=$3 +mount=$4 +world_size=$5 +update_freq=$6 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=2 + +CODE_ROOT=${PWD} + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="${mount}/exp/finetune_asr/$exp_name/edctc40k_from_${cpt}_bz2.6m_lr1e-5" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/finetune_asr \ + --config-name speechut_base_100h \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + model.w2v_path=${w2v_path} \ + \ + optimization.lr=[0.00001] \ + optimization.max_update=40000 \ + dataset.max_tokens=1300000 \ + optimization.update_freq=[${update_freq}] \ + distributed_training.distributed_world_size=${world_size} \ + \ + dataset.train_subset="train_clean_100" \ + dataset.valid_subset="dev_other" \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=edctc40k_from_${cpt}_bz2.6m_lr1e-5 diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_edctc.sh b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_edctc.sh new file mode 100644 index 0000000000000000000000000000000000000000..9dce06398c476a26290839b7f3a8f8632a5060e0 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_edctc.sh @@ -0,0 +1,61 @@ +##################################### +# SpeechUT ASR model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_other] [beam_size=10] [ctc_weight=0.2] [--normalize]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +beam_size=$4 +ctc_weight=$5 +extra=$6 +[ -z $extra ] && echo "Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..." +[ -z $gen_set ] && gen_set="dev_other" +[ -z $beam_size ] && beam_size=10 +[ -z $ctc_weight ] && ctc_weight=0.2 +[ $ctc_weight == 0 ] && [ $beam_size != 1 ] && echo "Change beam size to 1 as no ctc-decoding used..." && beam_size=1 +[ $ctc_weight != 0 ] && extra="$extra --batch-size 1" + +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}/${subset}_${world_size}_${rank} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --user-dir $CODE_ROOT/speechut \ + --label-dir ${DATA_DIR} \ + --labels '["ltr"]' \ + --single-target \ + --post-process letter \ + --gen-subset ${subset} \ + --max-tokens 2000000 \ + \ + --task joint_sc2t_pretraining \ + --add-decoder-target \ + --fine-tuning \ + --pad-audio \ + --random-crop \ + \ + --ctc-weight ${ctc_weight} $extra \ + --beam ${beam_size} \ + \ + --path ${model_path} \ + --results-path $results_path \ + \ + --scoring wer --max-len-a 0.00078125 --max-len-b 200 \ + & +done +wait + + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}/${subset}_${world_size}_${rank} + echo $results_path + tail -n 1 $results_path/generate-*.txt +done diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_edctclm.sh b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_edctclm.sh new file mode 100644 index 0000000000000000000000000000000000000000..dadd1a4286de52cef0250640ef64fd4117e11ecb --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_edctclm.sh @@ -0,0 +1,66 @@ +##################################### +# SpeechUT ASR model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_other] [beam_size=30] [ctc_weight=0.3] [lm_weight=0.7] [lm_path] [--normalize]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +beam_size=$4 +ctc_weight=$5 +lm_weight=$6 +lm_path=$7 +extra=$8 +[ -z $extra ] && echo "Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..." +[ -z $gen_set ] && gen_set="dev_other" +[ -z $beam_size ] && beam_size=30 +[ -z $ctc_weight ] && ctc_weight=0.3 +[ -z $lm_weight ] && lm_weight=0.7 +[ -z $lm_path ] && lm_path="/mnt/default/v-junyiao/librispeech/lm/lm_ctc_form/checkpoint_best.pt" +[ $ctc_weight == 0 ] && [ $beam_size != 1 ] && echo "Change beam size to 1 and lm_weight to 0 as no ctc-decoding used..." && beam_size=1 && lm_weight=0 +[ $ctc_weight != 0 ] && extra="$extra --batch-size 1" + +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}_lm${lm_weight}/${subset}_${world_size}_${rank} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --user-dir $CODE_ROOT/speechut \ + --label-dir ${DATA_DIR} \ + --labels '["ltr"]' \ + --single-target \ + --post-process letter \ + --gen-subset ${subset} \ + --max-tokens 800000 \ + \ + --task joint_sc2t_pretraining \ + --add-decoder-target \ + --fine-tuning \ + --pad-audio \ + --random-crop \ + \ + --ctc-weight ${ctc_weight} $extra \ + --lm-weight ${lm_weight} --lm-path ${lm_path} \ + --beam ${beam_size} \ + \ + --path ${model_path} \ + --results-path ${results_path} \ + \ + --scoring wer --max-len-a 0.00078125 --max-len-b 200 \ + & +done +wait + + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}_lm${lm_weight}/${subset}_${world_size}_${rank} + echo $results_path + tail -n 1 $results_path/generate-*.txt +done diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_lm_nj.sh b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_lm_nj.sh new file mode 100644 index 0000000000000000000000000000000000000000..a5627a59975a01736907a5cc3fb76df335709b43 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_lm_nj.sh @@ -0,0 +1,74 @@ +##################################### +# SpeechUT ASR model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_other] [beam_size=30] [ctc_weight=0.3] [lm_weight=0.7] [lm_path] [nj=8] [ngpu=8] [--normalize]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +beam_size=$4 +ctc_weight=$5 +lm_weight=$6 +lm_path=$7 +nj=$8 +ngpu=$9 +extra=${10} +[ -z $extra ] && echo "Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..." +[ -z $gen_set ] && gen_set="dev_other" +[ -z $beam_size ] && beam_size=30 +[ -z $ctc_weight ] && ctc_weight=0.3 +[ -z $lm_weight ] && lm_weight=0.7 +[ -z $lm_path ] && lm_path="/mnt/default/v-junyiao/librispeech/lm/lm_ctc_form/checkpoint_best.pt" +[ $ctc_weight == 0 ] && [ $beam_size != 1 ] && echo "Change beam size to 1 and lm_weight to 0 as no ctc-decoding used..." && beam_size=1 && lm_weight=0 +[ $ctc_weight != 0 ] && extra="$extra --batch-size 1" +[ -z $nj ] && nj=8 +[ -z $ngpu ] && ngpu=8 + +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} + +world_size=$nj +for rank in $(seq 0 $((nj - 1))); do + export CUDA_VISIBLE_DEVICES=$((rank % $ngpu)) + for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}_lm${lm_weight}/${subset}_${world_size}_${rank} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --user-dir $CODE_ROOT/speechut \ + --label-dir ${DATA_DIR} \ + --labels '["ltr"]' \ + --single-target \ + --post-process letter \ + --gen-subset ${subset} \ + --max-tokens 800000 \ + \ + --task joint_sc2t_pretraining \ + --add-decoder-target \ + --fine-tuning \ + --pad-audio \ + --random-crop \ + \ + --ctc-weight ${ctc_weight} $extra \ + --lm-weight ${lm_weight} --lm-path ${lm_path} \ + --beam ${beam_size} \ + \ + --path ${model_path} \ + --results-path $results_path \ + \ + --scoring wer --max-len-a 0.00078125 --max-len-b 200 \ + --distributed-world-size ${world_size} --distributed-rank ${rank} \ + & + done +done +wait + + +for subset in ${gen_set//,/ }; do + results_dir=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}_lm${lm_weight} + cat $results_dir/${subset}_${world_size}_*/generate-${subset}.txt | grep -v "^Generate" > $results_dir/generate-${subset}.all.txt +done diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_nj.sh b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_nj.sh new file mode 100644 index 0000000000000000000000000000000000000000..08e6df431c9856f24122118017b8ae85bacc5444 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_nj.sh @@ -0,0 +1,69 @@ +##################################### +# SpeechUT ASR model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_other] [beam_size=10] [ctc_weight=0.2] [nj=32] [ngpu=8] [--normalize]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +beam_size=$4 +ctc_weight=$5 +nj=$6 +ngpu=$7 +extra=$8 +[ -z $extra ] && echo "Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..." +[ -z $gen_set ] && gen_set="dev_other" +[ -z $beam_size ] && beam_size=10 +[ -z $ctc_weight ] && ctc_weight=0.2 +[ $ctc_weight == 0 ] && [ $beam_size != 1 ] && echo "Change beam size to 1 as no ctc-decoding used..." && beam_size=1 +[ $ctc_weight != 0 ] && extra="$extra --batch-size 1" +[ -z $nj ] && nj=32 +[ -z $ngpu ] && ngpu=8 + +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} + +world_size=$nj +for rank in $(seq 0 $((nj - 1))); do + export CUDA_VISIBLE_DEVICES=$((rank % $ngpu)) + for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}/${subset}_${world_size}_${rank} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --user-dir $CODE_ROOT/speechut \ + --label-dir ${DATA_DIR} \ + --labels '["ltr"]' \ + --single-target \ + --post-process letter \ + --gen-subset ${subset} \ + --max-tokens 2000000 \ + \ + --task joint_sc2t_pretraining \ + --add-decoder-target \ + --fine-tuning \ + --pad-audio \ + --random-crop \ + \ + --ctc-weight ${ctc_weight} $extra \ + --beam ${beam_size} \ + \ + --path ${model_path} \ + --results-path $results_path \ + \ + --scoring wer --max-len-a 0.00078125 --max-len-b 200 \ + --distributed-world-size ${world_size} --distributed-rank ${rank} \ + & + done +done +wait + + +for subset in ${gen_set//,/ }; do + results_dir=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight} + cat $results_dir/${subset}_${world_size}_*/generate-${subset}.txt | grep -v "^Generate" > $results_dir/generate-${subset}.all.txt +done diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_st/finetune_base_mustc_enxx.sh b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_st/finetune_base_mustc_enxx.sh new file mode 100644 index 0000000000000000000000000000000000000000..59c8a2a0346b708894b1568fa691c062537aa559 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_st/finetune_base_mustc_enxx.sh @@ -0,0 +1,77 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 4 ] && echo "Usage: $0 [mount=${PWD}] [world_size=8] [update_freq=4/6]" && exit 0 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +w2v_path=$1 +DATA_DIR=$2 +lang=$3 +cpt=$4 +mount=$5 +world_size=$6 +update_freq=$7 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=4 + +CODE_ROOT=${PWD} + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="$mount/exp/finetune_mustc/$exp_name/legacy_en${lang}_from_${cpt}_bz3.2m_lr3e-5" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +max_tokens=800000 +python $CODE_ROOT/fairseq/fairseq_cli/train.py ${DATA_DIR} \ + --save-dir ${MODEL_DIR} \ + --user-dir $CODE_ROOT/speechut \ + --task speech_to_text \ + --config-yaml config_en${lang}.yaml \ + --train-subset "train_st" \ + --valid-subset "dev_st" \ + --fp16 \ + --seed 1 \ + \ + --ddp-backend no_c10d \ + --distributed-world-size ${world_size} \ + --tensorboard-logdir ${MODEL_DIR} \ + \ + --criterion label_smoothed_cross_entropy --report-accuracy \ + --label-smoothing 0.3 \ + \ + --optimizer adam \ + --clip-norm 1.0 \ + --lr 3e-05 \ + --lr-scheduler polynomial_decay --warmup-updates 5000 \ + --max-update 50000 \ + --total-num-update 50000 \ + --update-freq ${update_freq} \ + \ + --max-tokens ${max_tokens} \ + --max-sentences 16 \ + --max-tokens-valid ${max_tokens} \ + --grouped-shuffling \ + --max-source-positions ${max_tokens} \ + --skip-invalid-size-inputs-valid-test \ + --num-workers 0 \ + --best-checkpoint-metric "accuracy" \ + --maximize-best-checkpoint-metric \ + \ + --arch "speechut_st_legacy" \ + --w2v-path ${w2v_path} \ + --layerdrop 0.1 \ + --activation-dropout 0.1 \ + --attention-dropout 0.1 \ + --feature-grad-mult 1.0 \ + \ + --apply-mask --mask-prob 0.5 \ + \ + --log-format json \ + --log-interval 100 \ + --save-interval 1 \ + --keep-last-epochs 5 \ + --keep-best-checkpoints 5 \ + \ + 2>&1 | tee ${MODEL_DIR}/train_en${lang}.log + diff --git a/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_st/inference_st.sh b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_st/inference_st.sh new file mode 100644 index 0000000000000000000000000000000000000000..3aefa10e360f57dbf66cff9d84c800b4da89619f --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts copy/tune_speechut_st/inference_st.sh @@ -0,0 +1,44 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [gen-set=dev] [beam_size=10] [lenpen=1.0]" && exit 0 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +lang=$3 +gen_set=$4 +beam_size=$5 +lenpen=$6 +[ -z $gen_set ] && gen_set="dev" +[ -z $beam_size ] && beam_size=10 +[ -z $lenpen ] && lenpen=1 +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} +results_path=$src_dir/decode_${cpt}_beam${beam_size}/${gen_set} +[ ! -d $results_path ] && mkdir -p $results_path + +python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --gen-subset ${gen_set}_st \ + --max-tokens 2000000 \ + --max-source-positions 2000000 \ + --num-workers 0 \ + \ + --user-dir $CODE_ROOT/speechut \ + --task speech_to_text \ + --config-yaml config_en${lang}.yaml \ + \ + --path ${model_path} \ + --results-path $results_path \ + \ + --scoring sacrebleu --max-len-a 0 --max-len-b 512 \ + --beam ${beam_size} \ + --lenpen $lenpen \ + # --model-overrides "{'model':{'w2v_path':'/path/to/your/pretrained/model.pt'}}" \ + + echo $results_path + tail -n 1 $results_path/generate-*.txt + sleep 1s diff --git a/SpeechT5/Speech2S/speech2s/scripts/__init__.py b/SpeechT5/Speech2S/speech2s/scripts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SpeechT5/Speech2S/speech2s/scripts/average_checkpoints.py b/SpeechT5/Speech2S/speech2s/scripts/average_checkpoints.py new file mode 100644 index 0000000000000000000000000000000000000000..a4711e4840a45118c9e28d0258f89fe64e964cf3 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/average_checkpoints.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import collections +import os +import re + +import torch +from fairseq.file_io import PathManager + + +def average_checkpoints(inputs): + """Loads checkpoints from inputs and returns a model with averaged weights. + + Args: + inputs: An iterable of string paths of checkpoints to load from. + + Returns: + A dict of string keys mapping to various values. The 'model' key + from the returned dict should correspond to an OrderedDict mapping + string parameter names to torch Tensors. + """ + params_dict = collections.OrderedDict() + params_keys = None + new_state = None + num_models = len(inputs) + + for fpath in inputs: + with PathManager.open(fpath, "rb") as f: + state = torch.load( + f, + map_location=( + lambda s, _: torch.serialization.default_restore_location(s, "cpu") + ), + ) + # Copies over the settings from the first checkpoint + if new_state is None: + new_state = state + + model_params = state["model"] + + model_params_keys = list(model_params.keys()) + if params_keys is None: + params_keys = model_params_keys + elif params_keys != model_params_keys: + raise KeyError( + "For checkpoint {}, expected list of params: {}, " + "but found: {}".format(f, params_keys, model_params_keys) + ) + + for k in params_keys: + p = model_params[k] + if isinstance(p, torch.HalfTensor): + p = p.float() + if k not in params_dict: + params_dict[k] = p.clone() + # NOTE: clone() is needed in case of p is a shared parameter + else: + params_dict[k] += p + + averaged_params = collections.OrderedDict() + for k, v in params_dict.items(): + averaged_params[k] = v + if averaged_params[k].is_floating_point(): + averaged_params[k].div_(num_models) + else: + averaged_params[k] //= num_models + new_state["model"] = averaged_params + return new_state + + +def last_n_checkpoints(paths, n, update_based, upper_bound=None): + assert len(paths) == 1 + path = paths[0] + if update_based: + pt_regexp = re.compile(r"checkpoint_\d+_(\d+)\.pt") + else: + pt_regexp = re.compile(r"checkpoint(\d+)\.pt") + files = PathManager.ls(path) + + entries = [] + for f in files: + m = pt_regexp.fullmatch(f) + if m is not None: + sort_key = int(m.group(1)) + if upper_bound is None or sort_key <= upper_bound: + entries.append((sort_key, m.group(0))) + if len(entries) < n: + raise Exception( + "Found {} checkpoint files but need at least {}", len(entries), n + ) + return [os.path.join(path, x[1]) for x in sorted(entries, reverse=True)[:n]] + + +def main(): + parser = argparse.ArgumentParser( + description="Tool to average the params of input checkpoints to " + "produce a new checkpoint", + ) + # fmt: off + parser.add_argument('--inputs', required=True, nargs='+', + help='Input checkpoint file paths.') + parser.add_argument('--output', required=True, metavar='FILE', + help='Write the new checkpoint containing the averaged weights to this path.') + num_group = parser.add_mutually_exclusive_group() + num_group.add_argument('--num-epoch-checkpoints', type=int, + help='if set, will try to find checkpoints with names checkpoint_xx.pt in the ' + 'path specified by input, and average last this many of them.') + num_group.add_argument('--num-update-checkpoints', type=int, + help='if set, will try to find checkpoints with names checkpoint_ee_xx.pt in the path specified by' + ' input, and average last this many of them.') + parser.add_argument('--checkpoint-upper-bound', type=int, + help='when using --num-epoch-checkpoints, this will set an upper bound on which epoch to use, ' + 'when using --num-update-checkpoints, this will set an upper bound on which update to use' + 'e.g., with --num-epoch-checkpoints=10 --checkpoint-upper-bound=50, checkpoints 41-50 would be' + ' averaged.' + 'e.g., with --num-update-checkpoints=10 --checkpoint-upper-bound=50000, checkpoints 40500-50000 would' + ' be averaged assuming --save-interval-updates 500' + ) + # fmt: on + args = parser.parse_args() + print(args) + + num = None + is_update_based = False + if args.num_update_checkpoints is not None: + num = args.num_update_checkpoints + is_update_based = True + elif args.num_epoch_checkpoints is not None: + num = args.num_epoch_checkpoints + + assert args.checkpoint_upper_bound is None or ( + args.num_epoch_checkpoints is not None + or args.num_update_checkpoints is not None + ), "--checkpoint-upper-bound requires --num-epoch-checkpoints or --num-update-checkpoints" + assert ( + args.num_epoch_checkpoints is None or args.num_update_checkpoints is None + ), "Cannot combine --num-epoch-checkpoints and --num-update-checkpoints" + + if num is not None: + args.inputs = last_n_checkpoints( + args.inputs, + num, + is_update_based, + upper_bound=args.checkpoint_upper_bound, + ) + print("averaging checkpoints: ", args.inputs) + + new_state = average_checkpoints(args.inputs) + with PathManager.open(args.output, "wb") as f: + torch.save(new_state, f) + print("Finished writing averaged checkpoint to {}".format(args.output)) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/scripts/build_sym_alignment.py b/SpeechT5/Speech2S/speech2s/scripts/build_sym_alignment.py new file mode 100644 index 0000000000000000000000000000000000000000..0ca5c18f7bd4b0fbf58b203793506ca395466129 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/build_sym_alignment.py @@ -0,0 +1,97 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Use this script in order to build symmetric alignments for your translation +dataset. +This script depends on fast_align and mosesdecoder tools. You will need to +build those before running the script. +fast_align: + github: http://github.com/clab/fast_align + instructions: follow the instructions in README.md +mosesdecoder: + github: http://github.com/moses-smt/mosesdecoder + instructions: http://www.statmt.org/moses/?n=Development.GetStarted +The script produces the following files under --output_dir: + text.joined - concatenation of lines from the source_file and the + target_file. + align.forward - forward pass of fast_align. + align.backward - backward pass of fast_align. + aligned.sym_heuristic - symmetrized alignment. +""" + +import argparse +import os +from itertools import zip_longest + + +def main(): + parser = argparse.ArgumentParser(description="symmetric alignment builer") + # fmt: off + parser.add_argument('--fast_align_dir', + help='path to fast_align build directory') + parser.add_argument('--mosesdecoder_dir', + help='path to mosesdecoder root directory') + parser.add_argument('--sym_heuristic', + help='heuristic to use for symmetrization', + default='grow-diag-final-and') + parser.add_argument('--source_file', + help='path to a file with sentences ' + 'in the source language') + parser.add_argument('--target_file', + help='path to a file with sentences ' + 'in the target language') + parser.add_argument('--output_dir', + help='output directory') + # fmt: on + args = parser.parse_args() + + fast_align_bin = os.path.join(args.fast_align_dir, "fast_align") + symal_bin = os.path.join(args.mosesdecoder_dir, "bin", "symal") + sym_fast_align_bin = os.path.join( + args.mosesdecoder_dir, "scripts", "ems", "support", "symmetrize-fast-align.perl" + ) + + # create joined file + joined_file = os.path.join(args.output_dir, "text.joined") + with open(args.source_file, "r", encoding="utf-8") as src, open( + args.target_file, "r", encoding="utf-8" + ) as tgt: + with open(joined_file, "w", encoding="utf-8") as joined: + for s, t in zip_longest(src, tgt): + print("{} ||| {}".format(s.strip(), t.strip()), file=joined) + + bwd_align_file = os.path.join(args.output_dir, "align.backward") + + # run forward alignment + fwd_align_file = os.path.join(args.output_dir, "align.forward") + fwd_fast_align_cmd = "{FASTALIGN} -i {JOINED} -d -o -v > {FWD}".format( + FASTALIGN=fast_align_bin, JOINED=joined_file, FWD=fwd_align_file + ) + assert os.system(fwd_fast_align_cmd) == 0 + + # run backward alignment + bwd_align_file = os.path.join(args.output_dir, "align.backward") + bwd_fast_align_cmd = "{FASTALIGN} -i {JOINED} -d -o -v -r > {BWD}".format( + FASTALIGN=fast_align_bin, JOINED=joined_file, BWD=bwd_align_file + ) + assert os.system(bwd_fast_align_cmd) == 0 + + # run symmetrization + sym_out_file = os.path.join(args.output_dir, "aligned") + sym_cmd = "{SYMFASTALIGN} {FWD} {BWD} {SRC} {TGT} {OUT} {HEURISTIC} {SYMAL}".format( + SYMFASTALIGN=sym_fast_align_bin, + FWD=fwd_align_file, + BWD=bwd_align_file, + SRC=args.source_file, + TGT=args.target_file, + OUT=sym_out_file, + HEURISTIC=args.sym_heuristic, + SYMAL=symal_bin, + ) + assert os.system(sym_cmd) == 0 + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/scripts/compare_namespaces.py b/SpeechT5/Speech2S/speech2s/scripts/compare_namespaces.py new file mode 100644 index 0000000000000000000000000000000000000000..bc24db624f8db36f546c263ba3a806dae6d466bf --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/compare_namespaces.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +"""Helper script to compare two argparse.Namespace objects.""" + +from argparse import Namespace # noqa + + +def main(): + + ns1 = eval(input("Namespace 1: ")) + ns2 = eval(input("Namespace 2: ")) + + def keys(ns): + ks = set() + for k in dir(ns): + if not k.startswith("_"): + ks.add(k) + return ks + + k1 = keys(ns1) + k2 = keys(ns2) + + def print_keys(ks, ns1, ns2=None): + for k in ks: + if ns2 is None: + print("{}\t{}".format(k, getattr(ns1, k, None))) + else: + print( + "{}\t{}\t{}".format(k, getattr(ns1, k, None), getattr(ns2, k, None)) + ) + + print("Keys unique to namespace 1:") + print_keys(k1 - k2, ns1) + print() + + print("Keys unique to namespace 2:") + print_keys(k2 - k1, ns2) + print() + + print("Overlapping keys with different values:") + ks = [k for k in k1 & k2 if getattr(ns1, k, "None") != getattr(ns2, k, "None")] + print_keys(ks, ns1, ns2) + print() + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/scripts/compound_split_bleu.sh b/SpeechT5/Speech2S/speech2s/scripts/compound_split_bleu.sh new file mode 100644 index 0000000000000000000000000000000000000000..1972fddcebff9a43a70bcf14c287175c68f60e3f --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/compound_split_bleu.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +if [ $# -ne 1 ]; then + echo "usage: $0 GENERATE_PY_OUTPUT" + exit 1 +fi + +GEN=$1 + +SYS=$GEN.sys +REF=$GEN.ref + +if [ $(tail -n 1 $GEN | grep BLEU | wc -l) -ne 1 ]; then + echo "not done generating" + exit +fi + +grep ^H $GEN | awk -F '\t' '{print $NF}' | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $SYS +grep ^T $GEN | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $REF +fairseq-score --sys $SYS --ref $REF diff --git a/SpeechT5/Speech2S/speech2s/scripts/constraints/extract.py b/SpeechT5/Speech2S/speech2s/scripts/constraints/extract.py new file mode 100644 index 0000000000000000000000000000000000000000..437b373856966e568ca93c13ebbd1417291e49da --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/constraints/extract.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +"""Extracts random constraints from reference files.""" + +import argparse +import random +import sys + + +def get_phrase(words, index, length): + assert index < len(words) - length + 1 + phr = " ".join(words[index : index + length]) + for i in range(index, index + length): + words.pop(index) + return phr + + +def main(args): + + if args.seed: + random.seed(args.seed) + + for line in sys.stdin: + constraints = [] + + def add_constraint(constraint): + constraints.append(constraint) + + source = line.rstrip() + if "\t" in line: + source, target = line.split("\t") + if args.add_sos: + target = f" {target}" + if args.add_eos: + target = f"{target} " + + if len(target.split()) >= args.len: + words = [target] + + num = args.number + + choices = {} + for i in range(num): + if len(words) == 0: + break + segmentno = random.choice(range(len(words))) + segment = words.pop(segmentno) + tokens = segment.split() + phrase_index = random.choice(range(len(tokens))) + choice = " ".join( + tokens[phrase_index : min(len(tokens), phrase_index + args.len)] + ) + for j in range( + phrase_index, min(len(tokens), phrase_index + args.len) + ): + tokens.pop(phrase_index) + if phrase_index > 0: + words.append(" ".join(tokens[0:phrase_index])) + if phrase_index + 1 < len(tokens): + words.append(" ".join(tokens[phrase_index:])) + choices[target.find(choice)] = choice + + # mask out with spaces + target = target.replace(choice, " " * len(choice), 1) + + for key in sorted(choices.keys()): + add_constraint(choices[key]) + + print(source, *constraints, sep="\t") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--number", "-n", type=int, default=1, help="number of phrases") + parser.add_argument("--len", "-l", type=int, default=1, help="phrase length") + parser.add_argument( + "--add-sos", default=False, action="store_true", help="add token" + ) + parser.add_argument( + "--add-eos", default=False, action="store_true", help="add token" + ) + parser.add_argument("--seed", "-s", default=0, type=int) + args = parser.parse_args() + + main(args) diff --git a/SpeechT5/Speech2S/speech2s/scripts/constraints/validate.py b/SpeechT5/Speech2S/speech2s/scripts/constraints/validate.py new file mode 100644 index 0000000000000000000000000000000000000000..d531ad9f39b1df42c98fe8f26ad61fe53a9ac0c5 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/constraints/validate.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import sys + + +"""Reads in a fairseq output file, and verifies that the constraints +(C- lines) are present in the output (the first H- line). Assumes that +constraints are listed prior to the first hypothesis. +""" + +constraints = [] +found = 0 +total = 0 +for line in sys.stdin: + if line.startswith("C-"): + constraints.append(line.rstrip().split("\t")[1]) + elif line.startswith("H-"): + text = line.split("\t")[2] + + for constraint in constraints: + total += 1 + if constraint in text: + found += 1 + else: + print(f"No {constraint} in {text}", file=sys.stderr) + + constraints = [] + +print(f"Found {found} / {total} = {100 * found / total:.1f}%") diff --git a/SpeechT5/Speech2S/speech2s/scripts/convert_dictionary.lua b/SpeechT5/Speech2S/speech2s/scripts/convert_dictionary.lua new file mode 100644 index 0000000000000000000000000000000000000000..14ee8c997f642c8ff196617c2dcd0584037a60c4 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/convert_dictionary.lua @@ -0,0 +1,34 @@ +-- Copyright (c) Facebook, Inc. and its affiliates. +-- +-- This source code is licensed under the MIT license found in the +-- LICENSE file in the root directory of this source tree. +-- +-- Usage: convert_dictionary.lua +require 'fairseq' +require 'torch' +require 'paths' + +if #arg < 1 then + print('usage: convert_dictionary.lua ') + os.exit(1) +end +if not paths.filep(arg[1]) then + print('error: file does not exit: ' .. arg[1]) + os.exit(1) +end + +dict = torch.load(arg[1]) +dst = paths.basename(arg[1]):gsub('.th7', '.txt') +assert(dst:match('.txt$')) + +f = io.open(dst, 'w') +for idx, symbol in ipairs(dict.index_to_symbol) do + if idx > dict.cutoff then + break + end + f:write(symbol) + f:write(' ') + f:write(dict.index_to_freq[idx]) + f:write('\n') +end +f:close() diff --git a/SpeechT5/Speech2S/speech2s/scripts/convert_model.lua b/SpeechT5/Speech2S/speech2s/scripts/convert_model.lua new file mode 100644 index 0000000000000000000000000000000000000000..61b92139294fb90a25989ebd2ee52a765fb278a2 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/convert_model.lua @@ -0,0 +1,108 @@ +-- Copyright (c) Facebook, Inc. and its affiliates. +-- +-- This source code is licensed under the MIT license found in the +-- LICENSE file in the root directory of this source tree. +-- +-- Usage: convert_model.lua +require 'torch' +local fairseq = require 'fairseq' + +model = torch.load(arg[1]) + +function find_weight_norm(container, module) + for _, wn in ipairs(container:listModules()) do + if torch.type(wn) == 'nn.WeightNorm' and wn.modules[1] == module then + return wn + end + end +end + +function push_state(dict, key, module) + if torch.type(module) == 'nn.Linear' then + local wn = find_weight_norm(model.module, module) + assert(wn) + dict[key .. '.weight_v'] = wn.v:float() + dict[key .. '.weight_g'] = wn.g:float() + elseif torch.type(module) == 'nn.TemporalConvolutionTBC' then + local wn = find_weight_norm(model.module, module) + assert(wn) + local v = wn.v:float():view(wn.viewOut):transpose(2, 3) + dict[key .. '.weight_v'] = v + dict[key .. '.weight_g'] = wn.g:float():view(module.weight:size(3), 1, 1) + else + dict[key .. '.weight'] = module.weight:float() + end + if module.bias then + dict[key .. '.bias'] = module.bias:float() + end +end + +encoder_dict = {} +decoder_dict = {} +combined_dict = {} + +function encoder_state(encoder) + luts = encoder:findModules('nn.LookupTable') + push_state(encoder_dict, 'embed_tokens', luts[1]) + push_state(encoder_dict, 'embed_positions', luts[2]) + + fcs = encoder:findModules('nn.Linear') + assert(#fcs >= 2) + local nInputPlane = fcs[1].weight:size(1) + push_state(encoder_dict, 'fc1', table.remove(fcs, 1)) + push_state(encoder_dict, 'fc2', table.remove(fcs, #fcs)) + + for i, module in ipairs(encoder:findModules('nn.TemporalConvolutionTBC')) do + push_state(encoder_dict, 'convolutions.' .. tostring(i - 1), module) + if nInputPlane ~= module.weight:size(3) / 2 then + push_state(encoder_dict, 'projections.' .. tostring(i - 1), table.remove(fcs, 1)) + end + nInputPlane = module.weight:size(3) / 2 + end + assert(#fcs == 0) +end + +function decoder_state(decoder) + luts = decoder:findModules('nn.LookupTable') + push_state(decoder_dict, 'embed_tokens', luts[1]) + push_state(decoder_dict, 'embed_positions', luts[2]) + + fcs = decoder:findModules('nn.Linear') + local nInputPlane = fcs[1].weight:size(1) + push_state(decoder_dict, 'fc1', table.remove(fcs, 1)) + push_state(decoder_dict, 'fc2', fcs[#fcs - 1]) + push_state(decoder_dict, 'fc3', fcs[#fcs]) + + table.remove(fcs, #fcs) + table.remove(fcs, #fcs) + + for i, module in ipairs(decoder:findModules('nn.TemporalConvolutionTBC')) do + if nInputPlane ~= module.weight:size(3) / 2 then + push_state(decoder_dict, 'projections.' .. tostring(i - 1), table.remove(fcs, 1)) + end + nInputPlane = module.weight:size(3) / 2 + + local prefix = 'attention.' .. tostring(i - 1) + push_state(decoder_dict, prefix .. '.in_projection', table.remove(fcs, 1)) + push_state(decoder_dict, prefix .. '.out_projection', table.remove(fcs, 1)) + push_state(decoder_dict, 'convolutions.' .. tostring(i - 1), module) + end + assert(#fcs == 0) +end + + +_encoder = model.module.modules[2] +_decoder = model.module.modules[3] + +encoder_state(_encoder) +decoder_state(_decoder) + +for k, v in pairs(encoder_dict) do + combined_dict['encoder.' .. k] = v +end +for k, v in pairs(decoder_dict) do + combined_dict['decoder.' .. k] = v +end + + +torch.save('state_dict.t7', combined_dict) diff --git a/SpeechT5/Speech2S/speech2s/scripts/count_docs.py b/SpeechT5/Speech2S/speech2s/scripts/count_docs.py new file mode 100644 index 0000000000000000000000000000000000000000..58d85af85e91377a34dbd01f7674436152fd08e8 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/count_docs.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Count the number of documents and average number of lines and tokens per +document in a large file. Documents should be separated by a single empty line. +""" + +import argparse +import gzip +import sys + +import numpy as np + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("input") + parser.add_argument("--gzip", action="store_true") + args = parser.parse_args() + + def gopen(): + if args.gzip: + return gzip.open(args.input, "r") + else: + return open(args.input, "r", encoding="utf-8") + + num_lines = [] + num_toks = [] + with gopen() as h: + num_docs = 1 + num_lines_in_doc = 0 + num_toks_in_doc = 0 + for i, line in enumerate(h): + if len(line.strip()) == 0: # empty line indicates new document + num_docs += 1 + num_lines.append(num_lines_in_doc) + num_toks.append(num_toks_in_doc) + num_lines_in_doc = 0 + num_toks_in_doc = 0 + else: + num_lines_in_doc += 1 + num_toks_in_doc += len(line.rstrip().split()) + if i % 1000000 == 0: + print(i, file=sys.stderr, end="", flush=True) + elif i % 100000 == 0: + print(".", file=sys.stderr, end="", flush=True) + print(file=sys.stderr, flush=True) + + print("found {} docs".format(num_docs)) + print("average num lines per doc: {}".format(np.mean(num_lines))) + print("average num toks per doc: {}".format(np.mean(num_toks))) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/scripts/read_binarized.py b/SpeechT5/Speech2S/speech2s/scripts/read_binarized.py new file mode 100644 index 0000000000000000000000000000000000000000..a414095d03fb022a6753e816fc8bfd80e11db24d --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/read_binarized.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse + +from fairseq.data import Dictionary, data_utils, indexed_dataset + + +def get_parser(): + parser = argparse.ArgumentParser( + description="writes text from binarized file to stdout" + ) + # fmt: off + parser.add_argument('--dataset-impl', help='dataset implementation', + choices=indexed_dataset.get_available_dataset_impl()) + parser.add_argument('--dict', metavar='FP', help='dictionary containing known words', default=None) + parser.add_argument('--input', metavar='FP', required=True, help='binarized file to read') + # fmt: on + + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + + dictionary = Dictionary.load(args.dict) if args.dict is not None else None + dataset = data_utils.load_indexed_dataset( + args.input, + dictionary, + dataset_impl=args.dataset_impl, + default="lazy", + ) + + for tensor_line in dataset: + if dictionary is None: + line = " ".join([str(int(x)) for x in tensor_line]) + else: + line = dictionary.string(tensor_line) + + print(line) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/scripts/rm_pt.py b/SpeechT5/Speech2S/speech2s/scripts/rm_pt.py new file mode 100644 index 0000000000000000000000000000000000000000..6cd063d21f0610fa7c42c2cfb2ee8af7c9c78677 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/rm_pt.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +import re +import shutil +import sys + + +pt_regexp = re.compile(r"checkpoint(\d+|_\d+_\d+|_[a-z]+)\.pt") +pt_regexp_epoch_based = re.compile(r"checkpoint(\d+)\.pt") +pt_regexp_update_based = re.compile(r"checkpoint_\d+_(\d+)\.pt") + + +def parse_checkpoints(files): + entries = [] + for f in files: + m = pt_regexp_epoch_based.fullmatch(f) + if m is not None: + entries.append((int(m.group(1)), m.group(0))) + else: + m = pt_regexp_update_based.fullmatch(f) + if m is not None: + entries.append((int(m.group(1)), m.group(0))) + return entries + + +def last_n_checkpoints(files, n): + entries = parse_checkpoints(files) + return [x[1] for x in sorted(entries, reverse=True)[:n]] + + +def every_n_checkpoints(files, n): + entries = parse_checkpoints(files) + return [x[1] for x in sorted(sorted(entries)[::-n])] + + +def main(): + parser = argparse.ArgumentParser( + description=( + "Recursively delete checkpoint files from `root_dir`, " + "but preserve checkpoint_best.pt and checkpoint_last.pt" + ) + ) + parser.add_argument("root_dirs", nargs="*") + parser.add_argument( + "--save-last", type=int, default=0, help="number of last checkpoints to save" + ) + parser.add_argument( + "--save-every", type=int, default=0, help="interval of checkpoints to save" + ) + parser.add_argument( + "--preserve-test", + action="store_true", + help="preserve checkpoints in dirs that start with test_ prefix (default: delete them)", + ) + parser.add_argument( + "--delete-best", action="store_true", help="delete checkpoint_best.pt" + ) + parser.add_argument( + "--delete-last", action="store_true", help="delete checkpoint_last.pt" + ) + parser.add_argument( + "--no-dereference", action="store_true", help="don't dereference symlinks" + ) + args = parser.parse_args() + + files_to_desymlink = [] + files_to_preserve = [] + files_to_delete = [] + for root_dir in args.root_dirs: + for root, _subdirs, files in os.walk(root_dir): + if args.save_last > 0: + to_save = last_n_checkpoints(files, args.save_last) + else: + to_save = [] + if args.save_every > 0: + to_save += every_n_checkpoints(files, args.save_every) + for file in files: + if not pt_regexp.fullmatch(file): + continue + full_path = os.path.join(root, file) + if ( + not os.path.basename(root).startswith("test_") or args.preserve_test + ) and ( + (file == "checkpoint_last.pt" and not args.delete_last) + or (file == "checkpoint_best.pt" and not args.delete_best) + or file in to_save + ): + if os.path.islink(full_path) and not args.no_dereference: + files_to_desymlink.append(full_path) + else: + files_to_preserve.append(full_path) + else: + files_to_delete.append(full_path) + + if len(files_to_desymlink) == 0 and len(files_to_delete) == 0: + print("Nothing to do.") + sys.exit(0) + + files_to_desymlink = sorted(files_to_desymlink) + files_to_preserve = sorted(files_to_preserve) + files_to_delete = sorted(files_to_delete) + + print("Operations to perform (in order):") + if len(files_to_desymlink) > 0: + for file in files_to_desymlink: + print(" - preserve (and dereference symlink): " + file) + if len(files_to_preserve) > 0: + for file in files_to_preserve: + print(" - preserve: " + file) + if len(files_to_delete) > 0: + for file in files_to_delete: + print(" - delete: " + file) + while True: + resp = input("Continue? (Y/N): ") + if resp.strip().lower() == "y": + break + elif resp.strip().lower() == "n": + sys.exit(0) + + print("Executing...") + if len(files_to_desymlink) > 0: + for file in files_to_desymlink: + realpath = os.path.realpath(file) + print("rm " + file) + os.remove(file) + print("cp {} {}".format(realpath, file)) + shutil.copyfile(realpath, file) + if len(files_to_delete) > 0: + for file in files_to_delete: + print("rm " + file) + os.remove(file) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/scripts/sacrebleu.sh b/SpeechT5/Speech2S/speech2s/scripts/sacrebleu.sh new file mode 100644 index 0000000000000000000000000000000000000000..c10bf2b76ea032deabab6f5c9d8a3e1e884f1642 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/sacrebleu.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +if [ $# -ne 4 ]; then + echo "usage: $0 TESTSET SRCLANG TGTLANG GEN" + exit 1 +fi + +TESTSET=$1 +SRCLANG=$2 +TGTLANG=$3 + +GEN=$4 + +if ! command -v sacremoses &> /dev/null +then + echo "sacremoses could not be found, please install with: pip install sacremoses" + exit +fi + +grep ^H $GEN \ +| sed 's/^H\-//' \ +| sort -n -k 1 \ +| cut -f 3 \ +| sacremoses detokenize \ +> $GEN.sorted.detok + +sacrebleu --test-set $TESTSET --language-pair "${SRCLANG}-${TGTLANG}" < $GEN.sorted.detok diff --git a/SpeechT5/Speech2S/speech2s/scripts/shard_docs.py b/SpeechT5/Speech2S/speech2s/scripts/shard_docs.py new file mode 100644 index 0000000000000000000000000000000000000000..97232c3c845ee01dc5ab627388934cc0f9588280 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/shard_docs.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Split a large file into shards while respecting document boundaries. Documents +should be separated by a single empty line. +""" + +import argparse +import contextlib + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("input") + parser.add_argument("--num-shards", type=int) + args = parser.parse_args() + + assert args.num_shards is not None and args.num_shards > 1 + + with open(args.input, "r", encoding="utf-8") as h: + with contextlib.ExitStack() as stack: + outputs = [ + stack.enter_context( + open(args.input + ".shard" + str(i), "w", encoding="utf-8") + ) + for i in range(args.num_shards) + ] + + doc = [] + first_doc = [True] * args.num_shards + + def output_doc(i): + if not first_doc[i]: + outputs[i].write("\n") + first_doc[i] = False + for line in doc: + outputs[i].write(line) + doc.clear() + + num_docs = 0 + for line in h: + if line.strip() == "": # empty line indicates new document + output_doc(num_docs % args.num_shards) + num_docs += 1 + else: + doc.append(line) + output_doc(num_docs % args.num_shards) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/scripts/split_train_valid_docs.py b/SpeechT5/Speech2S/speech2s/scripts/split_train_valid_docs.py new file mode 100644 index 0000000000000000000000000000000000000000..ff159785284a13b44626b207d84430c592acaf8f --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/split_train_valid_docs.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +""" +Split a large file into a train and valid set while respecting document +boundaries. Documents should be separated by a single empty line. +""" + +import argparse +import random +import sys + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("input") + parser.add_argument("sample_output", help="train output file") + parser.add_argument("remainder_output", help="valid output file") + parser.add_argument("-k", type=int, help="remainder size") + parser.add_argument( + "--lines", action="store_true", help="split lines instead of docs" + ) + args = parser.parse_args() + + assert args.k is not None + + sample = [] + remainder = [] + num_docs = [0] + + def update_sample(doc): + if len(sample) < args.k: + sample.append(doc.copy()) + else: + i = num_docs[0] + j = random.randrange(i + 1) + if j < args.k: + remainder.append(sample[j]) + sample[j] = doc.copy() + else: + remainder.append(doc.copy()) + num_docs[0] += 1 + doc.clear() + + with open(args.input, "r", encoding="utf-8") as h: + doc = [] + for i, line in enumerate(h): + if line.strip() == "": # empty line indicates new document + update_sample(doc) + else: + doc.append(line) + if args.lines: + update_sample(doc) + if i % 1000000 == 0: + print(i, file=sys.stderr, end="", flush=True) + elif i % 100000 == 0: + print(".", file=sys.stderr, end="", flush=True) + if len(doc) > 0: + update_sample(doc) + print(file=sys.stderr, flush=True) + + assert len(sample) == args.k + + with open(args.sample_output, "w", encoding="utf-8") as out: + first = True + for doc in sample: + if not first and not args.lines: + out.write("\n") + first = False + for line in doc: + out.write(line) + + with open(args.remainder_output, "w", encoding="utf-8") as out: + first = True + for doc in remainder: + if not first and not args.lines: + out.write("\n") + first = False + for line in doc: + out.write(line) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/scripts/spm_decode.py b/SpeechT5/Speech2S/speech2s/scripts/spm_decode.py new file mode 100644 index 0000000000000000000000000000000000000000..7d7b68b240265924601ca6a738ed3d7b4b8e9cda --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/spm_decode.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import argparse + +import sentencepiece as spm + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model", required=True, help="sentencepiece model to use for decoding" + ) + parser.add_argument("--input", required=True, help="input file to decode") + parser.add_argument("--input_format", choices=["piece", "id"], default="piece") + args = parser.parse_args() + + sp = spm.SentencePieceProcessor() + sp.Load(args.model) + + if args.input_format == "piece": + + def decode(input): + return "".join(sp.DecodePieces(input)) + + elif args.input_format == "id": + + def decode(input): + return "".join(sp.DecodeIds(input)) + + else: + raise NotImplementedError + + def tok2int(tok): + # remap reference-side (represented as <>) to 0 + return int(tok) if tok != "<>" else 0 + + with open(args.input, "r", encoding="utf-8") as h: + for line in h: + if args.input_format == "id": + print(decode(list(map(tok2int, line.rstrip().split())))) + elif args.input_format == "piece": + print(decode(line.rstrip().split())) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/scripts/spm_encode.py b/SpeechT5/Speech2S/speech2s/scripts/spm_encode.py new file mode 100644 index 0000000000000000000000000000000000000000..f91e0bb728a33448c1415aee6036ac9d0feac11f --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/spm_encode.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import argparse +import contextlib +import sys + +import sentencepiece as spm + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model", required=True, help="sentencepiece model to use for encoding" + ) + parser.add_argument( + "--inputs", nargs="+", default=["-"], help="input files to filter/encode" + ) + parser.add_argument( + "--outputs", nargs="+", default=["-"], help="path to save encoded outputs" + ) + parser.add_argument("--output_format", choices=["piece", "id"], default="piece") + parser.add_argument( + "--min-len", + type=int, + metavar="N", + help="filter sentence pairs with fewer than N tokens", + ) + parser.add_argument( + "--max-len", + type=int, + metavar="N", + help="filter sentence pairs with more than N tokens", + ) + args = parser.parse_args() + + assert len(args.inputs) == len( + args.outputs + ), "number of input and output paths should match" + + sp = spm.SentencePieceProcessor() + sp.Load(args.model) + + if args.output_format == "piece": + + def encode(input): + return sp.EncodeAsPieces(input) + + elif args.output_format == "id": + + def encode(input): + return list(map(str, sp.EncodeAsIds(input))) + + else: + raise NotImplementedError + + if args.min_len is not None or args.max_len is not None: + + def valid(line): + return (args.min_len is None or len(line) >= args.min_len) and ( + args.max_len is None or len(line) <= args.max_len + ) + + else: + + def valid(lines): + return True + + with contextlib.ExitStack() as stack: + inputs = [ + stack.enter_context(open(input, "r", encoding="utf-8")) + if input != "-" + else sys.stdin + for input in args.inputs + ] + outputs = [ + stack.enter_context(open(output, "w", encoding="utf-8")) + if output != "-" + else sys.stdout + for output in args.outputs + ] + + stats = { + "num_empty": 0, + "num_filtered": 0, + } + + def encode_line(line): + line = line.strip() + if len(line) > 0: + line = encode(line) + if valid(line): + return line + else: + stats["num_filtered"] += 1 + else: + stats["num_empty"] += 1 + return None + + for i, lines in enumerate(zip(*inputs), start=1): + enc_lines = list(map(encode_line, lines)) + if not any(enc_line is None for enc_line in enc_lines): + for enc_line, output_h in zip(enc_lines, outputs): + print(" ".join(enc_line), file=output_h) + if i % 10000 == 0: + print("processed {} lines".format(i), file=sys.stderr) + + print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr) + print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/scripts/spm_train.py b/SpeechT5/Speech2S/speech2s/scripts/spm_train.py new file mode 100644 index 0000000000000000000000000000000000000000..9db668fd4166a860198784990de68ea26157995d --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/spm_train.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys + +import sentencepiece as spm + + +if __name__ == "__main__": + spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:])) diff --git a/SpeechT5/Speech2S/speech2s/scripts/test_fsdp.sh b/SpeechT5/Speech2S/speech2s/scripts/test_fsdp.sh new file mode 100644 index 0000000000000000000000000000000000000000..1f428a035e4474427ded991f8e8307ea59f61f69 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/scripts/test_fsdp.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +rm -rf fsdp_dummy +mkdir -p fsdp_dummy +CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train /private/home/sshleifer/data-bin/stories_mmap \ + --ddp-backend fully_sharded --fp16 --fp16-init-scale 4 \ + --cpu-offload --checkpoint-activations \ + --task language_modeling --tokens-per-sample 256 --batch-size 8 \ + --arch transformer_lm_gpt2_tiny \ + --optimizer cpu_adam --adam-betas "(0.9,0.98)" \ + --lr 0.0001 --lr-scheduler polynomial_decay --warmup-updates 5 --total-num-update 10 \ + --max-update 5 --log-format json --log-interval 1 \ + --save-interval-updates 5 --save-dir fsdp_dummy --disable-validation \ + --restore-file x.pt "$@" + +# Now we try to load the checkpoint +CUDA_VISIBLE_DEVICES=0,1 fairseq-train /private/home/sshleifer/data-bin/stories_mmap \ + --ddp-backend fully_sharded --fp16 --fp16-init-scale 4 \ + --cpu-offload --checkpoint-activations \ + --task language_modeling --tokens-per-sample 256 --batch-size 8 \ + --arch transformer_lm_gpt2_tiny \ + --optimizer cpu_adam --adam-betas "(0.9,0.98)" \ + --lr 0.0001 --lr-scheduler polynomial_decay --warmup-updates 5 --total-num-update 10 \ + --max-update 2 --log-format json --log-interval 1 \ + --save-interval-updates 2 --save-dir fsdp_dummy diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/base_sc2c_enes.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/base_sc2c_enes.sh new file mode 100644 index 0000000000000000000000000000000000000000..08e00403f961625ec2c819f5ee85a2ce74e64e9a --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/base_sc2c_enes.sh @@ -0,0 +1,64 @@ + +# #################################### +# Hubert SCT2T ED model # +# #################################### + +world_size=$1 +update_freq=$2 +exp_name=$3 +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=1 +[ -z $exp_name ] && exp_name=sc2t_base_enes_${world_size}gpu_${update_freq}accum6666 + + +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku +CONFIG_DIR=/mnt/output/users/v-kunwei/code/stpretrain_scripts/config +DATA_DIR="/mnt/output/users/v-kunwei/data/s2s_data/speech_enes" +TEXT_DATA_DIR="/mnt/output/users/v-kunwei/data/s2s_data/text_enes/bin-idx" +MODEL_DIR="/mnt/output/v-kunwei/data/s2s_data/exp/S2S_enes/$exp_name" + +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + + +python $FAIRSEQ_ROOT/fairseq_cli/hydra_train.py \ + --config-dir $CONFIG_DIR/pretrain \ + --config-name sc2t_base_librispeech \ + \ + +task.store_labels=true \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + +task.text_cfg.data_config=config.yaml \ + task.text_cfg.text_maxtokens_ratio=3.0 \ + \ + +criterion.dec_loss_type="ce" \ + \ + criterion.text_weight=1.0 \ + \ + model.use_rel_pos_enc=true \ + +model.code_use_rel_pos_enc=true \ + +model.pad_with_code=true \ + model.text_transformer.no_scale_embedding=true \ + model.text_transformer.layernorm_embedding=true \ + +model.share_decoder_input_output_embed=true \ + \ + dataset.train_subset=\"train_all+en.kmu-spm\" \ + dataset.valid_subset=\"valid+en_valid.kmu-spm\" \ + dataset.num_workers=0 \ + dataset.max_tokens=1000000 \ + optimization.update_freq=[${update_freq}] \ + optimization.max_update=400000 \ + \ + distributed_training.distributed_world_size=${world_size} \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=${exp_name} + + +sleep 5m +echo "All finished" + diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/base_sc2c_esen.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/base_sc2c_esen.sh new file mode 100644 index 0000000000000000000000000000000000000000..2a15bd129b961e9c5eeff211f7c03f7f8fcc20c9 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/base_sc2c_esen.sh @@ -0,0 +1,64 @@ + +# #################################### +# Hubert SCT2T ED model # +# #################################### + +world_size=$1 +update_freq=$2 +exp_name=$3 +[ -z $world_size ] && world_size=24 +[ -z $update_freq ] && update_freq=3 +[ -z $exp_name ] && exp_name=sc2t_base_esen_${world_size}gpu_${update_freq}accum1 + + +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku +CONFIG_DIR=/mnt/output/users/v-kunwei/code/stpretrain_scripts/config +DATA_DIR="/mnt/output/users/v-kunwei/data/s2s_data/speech_esen" +TEXT_DATA_DIR="/mnt/output/users/v-kunwei/data/s2s_data/text_esen" +MODEL_DIR="/mnt/output/v-kunwei/data/s2s_data/exp/S2S_esen/$exp_name" + +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + + +python $FAIRSEQ_ROOT/fairseq_cli/hydra_train.py \ + --config-dir $CONFIG_DIR/pretrain \ + --config-name sc2t_base_librispeech \ + \ + +task.store_labels=true \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + +task.text_cfg.data_config=config.yaml \ + task.text_cfg.text_maxtokens_ratio=3.0 \ + \ + +criterion.dec_loss_type="ce" \ + \ + criterion.text_weight=1.0 \ + \ + model.use_rel_pos_enc=true \ + +model.code_use_rel_pos_enc=true \ + +model.pad_with_code=true \ + model.text_transformer.no_scale_embedding=true \ + model.text_transformer.layernorm_embedding=true \ + +model.share_decoder_input_output_embed=true \ + \ + dataset.train_subset=\"train+en.kmu-spm\" \ + dataset.valid_subset=\"valid+en_valid.kmu-spm\" \ + dataset.num_workers=0 \ + dataset.max_tokens=1000000 \ + optimization.update_freq=[${update_freq}] \ + optimization.max_update=400000 \ + \ + distributed_training.distributed_world_size=${world_size} \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=${exp_name} + + +sleep 5m +echo "All finished" + diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config.yaml b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58ba896d1a38a7ac980d213d818b1d2e427c9eb6 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config.yaml @@ -0,0 +1,4 @@ +audio_root: ./ +standardize_audio: true +use_audio_input: true +vocab_filename: dict.txt diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/finetune_asr/base_100h.yaml b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/finetune_asr/base_100h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c9fae8e626ccb3d209334d754ff6823b40c2c4e --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/finetune_asr/base_100h.yaml @@ -0,0 +1,101 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 1 + keep_last_epochs: 5 + keep_best_checkpoints: 5 + best_checkpoint_metric: wer + restore_file: checkpoint_last.pt + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 1 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: hubert_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: false # must be consistent with pre-training + labels: ["ltr"] + single_target: true + add_decoder: false + pad_audio: false + random_crop: true + tokenizer: "none" + sp_path: None + +dataset: + num_workers: 0 + max_tokens: 1200000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_100 + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: label_smoothed_cross_entropy + #zero_infinity: true + + +optimization: + max_update: 80000 + lr: [0.00003] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: hubert_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.1 + decoder_layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + add_decoder: false + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/finetune_asr/large_960h.yaml b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/finetune_asr/large_960h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..360182329dd245e1d2f8d10f412654fc5ba2afb3 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/finetune_asr/large_960h.yaml @@ -0,0 +1,98 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + +checkpoint: + save_interval: 1 + keep_last_epochs: 10 + keep_best_checkpoints: 5 + best_checkpoint_metric: wer + restore_file: checkpoint_last.pt + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 24 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: hubert_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: true # must be consistent with pre-training + labels: ["ltr"] + single_target: true + add_decoder: false + pad_audio: false + random_crop: true + tokenizer: "none" + sp_path: None + +dataset: + num_workers: 0 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 200000 + lr: [0.00003] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: hubert_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.0 + decoder_layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + add_decoder: false + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/pretrain/mbart.yaml b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/pretrain/mbart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51025f2f8ec584a888a4e07c8c246829351af948 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/pretrain/mbart.yaml @@ -0,0 +1,120 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 4 + keep_last_epochs: 4 + save_interval_updates: 20000 + keep_interval_updates: -1 + keep_interval_updates_pattern: 50000 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 8 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: denoising + data: ??? + mask: 0.15 + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 1 + +criterion: + _name: sc2t + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + label_smoothing: 0.1 + text_weight: 0.1 + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: stbert + label_rate: ??? + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: default + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + encoder_layers: 6 + encoder_attention_heads: 8 + decoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + add_code_encoder: true + add_adaptor: false + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + adaptive_input: ${model.adaptive_input} + max_source_positions: 3000 + checkpoint_activations: ${model.checkpoint_activations} + no_scale_embedding: false + layernorm_embedding: false + quant_noise: + pq: ${model.quant_noise_pq} + encoder: + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 8 + normalize_before: false + learned_pos: true + layerdrop: ${model.encoder_layerdrop} + + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/pretrain/sc2t_base_librispeech.yaml b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/pretrain/sc2t_base_librispeech.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0cd16561c9d4715d21824cbbc7271940d3ceeda7 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/pretrain/sc2t_base_librispeech.yaml @@ -0,0 +1,137 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 4 + keep_last_epochs: 4 + save_interval_updates: 20000 + keep_interval_updates: -1 + keep_interval_updates_pattern: 50000 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 8 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: joint_sc2t_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: false # must be consistent with extractor + add_decoder: true + text_cfg: + seed: ${common.seed} + text_data: ??? + sample_break_mode: eos + tokens_per_sample: 1024 + shorten_method: "random_crop" + text_maxtokens_ratio: 1.0 + + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 1 + +criterion: + _name: sc2t + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + label_smoothing: 0.1 + text_weight: 0.1 + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: stbert + label_rate: ??? + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: default + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + encoder_layers: 6 + encoder_attention_heads: 8 + decoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + add_code_encoder: true + add_adaptor: false + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + adaptive_input: ${model.adaptive_input} + max_source_positions: 3000 + checkpoint_activations: ${model.checkpoint_activations} + no_scale_embedding: false + layernorm_embedding: false + quant_noise: + pq: ${model.quant_noise_pq} + encoder: + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 8 + normalize_before: false + learned_pos: true + layerdrop: ${model.encoder_layerdrop} + + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/translation/text2code.yaml b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/translation/text2code.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bed25135e0da21c20d33475ad33437c63e6703d7 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config/translation/text2code.yaml @@ -0,0 +1,81 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 1000000 + keep_last_epochs: 5 + save_interval_updates: 1000 + keep_interval_updates_pattern: 10000 + keep_interval_updates: 5 + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 1 + nprocs_per_node: 8 + + +criterion: + _name: "label_smoothed_cross_entropy" + + +task: + _name: "translation_from_jst" + +dataset: + num_workers: 0 + max_tokens: 4096 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: ${model.freeze_finetune_updates} + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + train_subset: train_clean_100 + valid_subset: dev_clean + required_batch_size_multiple: 1 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: hubert_t2c + w2v_path: ??? + layerdrop: 0.1 + decoder_layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config_mbart.yaml b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config_mbart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51025f2f8ec584a888a4e07c8c246829351af948 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/config_mbart.yaml @@ -0,0 +1,120 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 4 + keep_last_epochs: 4 + save_interval_updates: 20000 + keep_interval_updates: -1 + keep_interval_updates_pattern: 50000 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 8 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: denoising + data: ??? + mask: 0.15 + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 1 + +criterion: + _name: sc2t + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + label_smoothing: 0.1 + text_weight: 0.1 + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: stbert + label_rate: ??? + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: default + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + encoder_layers: 6 + encoder_attention_heads: 8 + decoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + add_code_encoder: true + add_adaptor: false + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + adaptive_input: ${model.adaptive_input} + max_source_positions: 3000 + checkpoint_activations: ${model.checkpoint_activations} + no_scale_embedding: false + layernorm_embedding: false + quant_noise: + pq: ${model.quant_noise_pq} + encoder: + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 8 + normalize_before: false + learned_pos: true + layerdrop: ${model.encoder_layerdrop} + + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/extract_hubert_feature_itp.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/extract_hubert_feature_itp.sh new file mode 100644 index 0000000000000000000000000000000000000000..52929896c612957d7fc8df452015411b0e6038bc --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/extract_hubert_feature_itp.sh @@ -0,0 +1,41 @@ + +if [ ! -d ${HOME}/azcopy_linux_amd64_10.11.0 ]; then + CURRENT_DIR=`pwd` + cd ${HOME} && wget https://azcopyvnext.azureedge.net/release20210616/azcopy_linux_amd64_10.11.0.tar.gz && tar -zxvf azcopy_linux_amd64_10.11.0.tar.gz && rm -f azcopy_linux_amd64_10.11.0.tar.gz && cd ${CURRENT_DIR} +fi +export PATH=$PATH:${HOME}/azcopy_linux_amd64_10.11.0/:${HOME}/.local/bin +export PYTHONPATH=$PYTHONPATH:/mnt/output/users/v-kunwei/code/fairseq + +rank=$1 +nshard=$2 +split=$3 +[ -z $rank ] && echo "please specify rank" +[ -z $nshard ] && nshard=1 +[ -z $split ] && split="train" + + +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq +ckpt_path=/mnt/output/users/v-kunwei/code/fairseq/examples/speech_to_speech/mhubert_base_vp_en_es_fr_it3.pt +tsv_dir=/home/v-kunwei + +feat_dir=${HOME}/$split +python $FAIRSEQ_ROOT/examples/hubert/simple_kmeans/dump_hubert_feature.py ${tsv_dir} ${split} ${ckpt_path} 9 ${nshard} ${rank} ${feat_dir} || exit 1 + + +echo "-------------------------------------------------------------------------------------------" +echo "---------------------------------- done ---------------------------------------------" +echo "-------------------------------------------------------------------------------------------" + +km_path=/mnt/output/users/v-kunwei/code/fairseq/examples/speech_to_speech/mhubert_base_vp_en_es_fr_it3_L11_km1000.bin +lab_dir=${HOME}/${split} +python $FAIRSEQ_ROOT/examples/hubert/simple_kmeans/dump_km_label.py ${feat_dir} ${split} ${km_path} ${nshard} ${rank} ${lab_dir} + + +# sas="?sv=2020-08-04&st=2022-01-02T04%3A58%3A15Z&se=2022-06-01T04%3A58%3A00Z&sr=c&sp=racwdl&sig=NyZKOHivgesEoZ8yvLsVT6aZMYQZMevLLmXNOTaWyvU%3D" +# blob="https://msranlcmtteamdrive.blob.core.windows.net/teamdrive/v-ziqzhang/data/stbert/data/librispeech/libri_960/hubert_release_iter2_layer9_kmeans/${split}" +# azcopy copy $feat_dir/${split}_${rank}_${nshard}.len "$blob/$sas" +# azcopy copy $feat_dir/${split}_${rank}_${nshard}.npy "$blob/$sas" +# azcopy copy $lab_dir "$blob/$sas" --recursive + + + diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/merge_code.py b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/merge_code.py new file mode 100644 index 0000000000000000000000000000000000000000..a02ba3e3058b75e2e603d7470e9ef93beebabcfa --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/merge_code.py @@ -0,0 +1,14 @@ +import sys +import torch + + +def main(): + for line in sys.stdin: + line = line.rstrip() + codes = list(map(int, line.split())) + merged_codes = torch.unique_consecutive(torch.tensor(codes)).numpy() + merged_codes = map(str, merged_codes) + print(" ".join(merged_codes)) + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/txt2idx.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/txt2idx.sh new file mode 100644 index 0000000000000000000000000000000000000000..466f8a3ef8debba9c9f5a76cfb02d1e25217c6b4 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/txt2idx.sh @@ -0,0 +1,43 @@ +[ $# -lt 3 ] && echo "Usage: $0 " && exit 0 + +if [ ! -d ${HOME}/sentencepiece ]; then + CURRENT_DIR=`pwd` + cd ${HOME} + git clone https://github.com/google/sentencepiece.git + cd sentencepiece + mkdir build && cd build + cmake .. && make -j 16 + sudo make install + sudo ldconfig -v + cd ${HOME} + cd ${CURRENT_DIR} +fi + +input=$1 +outdir=$2 +DICT=$3 +suffix=$4 +outname=${input##*/} +outname=${outname%.txt*} +[ -z $input ] && echo "You must specify a source file" && exit 1 + +[ -z $DICT ] && echo "No dict was specified!" && exit 1 +[ -z $outdir ] && outdir=${input%/*} +[ -z $outdir ] && outdir="." +[ ! -d $outdir ] && mkdir -p $outdir + +echo "Dict : $DICT" +echo "------------------------------- creating idx/bin--------------------------------------------" +echo "$input --> $outdir/${outname}${suffix}.idx" +fairseq-preprocess \ + --only-source \ + --trainpref $input \ + --destdir $outdir \ + --thresholdsrc 0 \ + --srcdict ${DICT} \ + --workers 40 + +mv $outdir/train.idx $outdir/${outname}${suffix}.idx +mv $outdir/train.bin $outdir/${outname}${suffix}.bin +echo "----------------------------------- done --------------------------------------------" + diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/txt2spm.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/txt2spm.sh new file mode 100644 index 0000000000000000000000000000000000000000..6baf72227b4013512af8a6724d2bff2156a47078 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/txt2spm.sh @@ -0,0 +1,33 @@ +[ $# -lt 2 ] && echo "Usage: $0 " && exit 0 + +if [ ! -d ${HOME}/sentencepiece ]; then + CURRENT_DIR=`pwd` + cd ${HOME} + git clone https://github.com/google/sentencepiece.git + cd sentencepiece + mkdir build && cd build + cmake .. && make -j 16 + sudo make install + sudo ldconfig -v + cd ${HOME} + cd ${CURRENT_DIR} +fi + +input=$1 +outdir=$2 +MODEL=$3 +suffix=$4 +outname=${input##*/} +outname=${outname%.wrd*} +[ -z $input ] && echo "You must specify a source file" && exit 1 + +[ -z $MODEL ] && MODEL=/mnt/default/v-ziqzhang/data/stbert/data/librispeech/hubert_release_iter2_layer9_kmeans/spm_unigram_10000.model && echo "No spm model was specified!, set default to $MODEL" +[ -z $outdir ] && outdir=${input%/*} +[ -z $outdir ] && outdir="." +[ ! -d $outdir ] && mkdir -p $outdir + +echo "Output: $outdir/$outname.spm" + +echo "------------------------------- tokenize text...--------------------------------------------" +spm_encode --model=$MODEL < ${input} > $outdir/$outname.spm || exit 1 +echo "----------------------------------- done --------------------------------------------" diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/wmt/normalize_en_text.py b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/wmt/normalize_en_text.py new file mode 100644 index 0000000000000000000000000000000000000000..83e332575ba317ded70c4095eeebbc5ec588b965 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/wmt/normalize_en_text.py @@ -0,0 +1,46 @@ +import re +import sys +import regex +import argparse +from tqdm import tqdm +from num2words import num2words + +def writefile(filename, lines): + with open(filename, 'w', encoding='utf-8') as f: + f.writelines(lines) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--input", "-i", required=True, type=str) + parser.add_argument("--output", "-o", required=True, type=str) + args = parser.parse_args() + outlines = [] + + with open(f"{args.input}", 'r') as f: + inputs = f.readlines() + + for line in tqdm(inputs): + line = line.strip().upper() + line = re.sub(u"([^\u0041-\u005a\u0061-\u007a\u0030-\u0039\'])", " ", line) + items = [] + for item in line.split(): + if item.isdigit(): + try: + item = num2words(item) + except Exception as e: + print(line) + raise(e) + items.append(item) + line = " ".join(items) + line = line.replace("-", " ") + line = line.upper() + line = line.replace("' S", "'S") + line = line.replace(" ", "|") + line = " ".join(line) + " |" + outlines.append(line + '\n') + # print(line) + + writefile(args.output, outlines) + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/wmt/normalize_es_text.py b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/wmt/normalize_es_text.py new file mode 100644 index 0000000000000000000000000000000000000000..0136b534be0bf4fef1c84b51c83a7ac9ad437700 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/data_process/wmt/normalize_es_text.py @@ -0,0 +1,49 @@ +import re +import sys +import regex +import argparse +import re,string +from tqdm import tqdm +from num2words import num2words + +def writefile(filename, lines): + with open(filename, 'w', encoding='utf-8') as f: + f.writelines(lines) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--input", "-i", required=True, type=str) + parser.add_argument("--output", "-o", required=True, type=str) + args = parser.parse_args() + outlines = [] + + with open(f"{args.input}", 'r') as f: + inputs = f.readlines() + + for line in tqdm(inputs): + line = line.strip() + line = re.sub(u"([^\u0041-\u005a\u0061-\u007a\u0030-\u0039\u00d1\u00f1\'])", " ", line) + items = [] + punc='~`!#$%^&*()_+-=|\';":/.,?><~.' + for item in line.split(): + if item.isdigit(): + try: + item = num2words(item, lang='es') + except Exception as e: + print(line) + raise(e) + items.append(item) + line = " ".join(items) + line = (re.sub(r"[%s]+" %punc, "",line)) + line = line.replace("-", " ") + line = line.lower() + line = line.replace("' S", "'S") + line = line.replace(" ", "|") + line = " ".join(line) + " |" + outlines.append(line + '\n') + # print(line) + + writefile(args.output, outlines) + +if __name__ == "__main__": + main() diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/decode_text2code_beam2.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/decode_text2code_beam2.sh new file mode 100644 index 0000000000000000000000000000000000000000..c9dcc10425a3a519ec456c73d15f3339de2a0eba --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/decode_text2code_beam2.sh @@ -0,0 +1,50 @@ + +##################################### +# Hubert ED model # +##################################### +[ $# -lt 1 ] && echo "Usage: $0 " && exit 0 +#source /mnt/default/v-ziqzhang/.bashrc_sing + +model_path=$1 +gen_set=$2 +tgt=$3 +src="ltr" +max_tokens=$4 +word_size=$5 +rank=$6 +outdir=$7 + +[ -z $tgt ] && tgt="kmu" +[ -z $gen_set ] && gen_set="dev_clean" +[ -z $word_size ] && word_size=1 +[ -z $rank ] && rank=0 +[ -z $max_tokens ] && max_tokens=16000 + +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku +DATA_DIR=/home/v-kunwei/ +[ $gen_set == "test" ] && DATA_DIR=/mnt/output/users/v-kunwei/code/fairseq_mlstku +[ -z $outdir ] && outdir=$DATA_DIR + + +results_path=$outdir/pseudo_${gen_set}_${rank} +[ ! -d $results_path ] && mkdir -p $results_path + +for subset in $gen_set; do + python $FAIRSEQ_ROOT/fairseq_cli/generate_mt_label.py $DATA_DIR \ + --path ${model_path} \ + --task "translation_from_jst" \ + --max-target-positions 18000 \ + --gen-subset $subset \ + -t $tgt -s "ltr" \ + --dataset-impl "raw" \ + --max-tokens ${max_tokens} \ + --beam 2 \ + --max-len-a 3 --max-len-b 100 \ + --results-path $results_path \ + --distributed-world-size $word_size --distributed-rank $rank \ + + echo "$model" > $results_path/model.record + sleep 1s +done | tee $results_path/decode.log + +sleep 2s diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/eval2.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/eval2.sh new file mode 100644 index 0000000000000000000000000000000000000000..0736ef4e338c9837cafc61d3c903d4683d684ea9 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/eval2.sh @@ -0,0 +1,12 @@ +lmweight=0 +num_gpus=8 +python examples/speech_recognition/new/infer.py --config-dir /mnt/output/users/v-kunwei/code/fairseq/examples/speech_recognition/new/conf \ +--config-name infer task=audio_finetuning task.data=/home/v-kunwei common.user_dir=/mnt/output/users/v-kunwei/code/fairseq/examples/data2vec \ +task.labels=ltr decoding.type=viterbi \ +decoding.lexicon=models/es_eval/espeak_dict.txt \ +decoding.unique_wer_file=True \ +dataset.gen_subset=test \ +common_eval.path=/mnt/output/users/v-kunwei/code/fairseq/models/es_eval/espeak_26lang_m10.pt decoding.beam=1500 distributed_training.distributed_world_size=${num_gpus} \ +decoding.results_path=/home/v-kunwei + +#sclite -h "/home/v-kunwei/hypo.units" -r "/home/v-kunwei/ref.units" -i rm -o all stdout > "./result.txt" diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/eval3.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/eval3.sh new file mode 100644 index 0000000000000000000000000000000000000000..4a2354319ddc7a672506e92e7577d3dc978b47a8 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/eval3.sh @@ -0,0 +1,4 @@ +#$subset=test +python examples/speech_recognition/infer.py /home/v-kunwei --task audio_finetuning \ +--nbest 1 --path /mnt/output/users/v-kunwei/code/fairseq/models/es_eval/espeak_26lang_m10.pt --gen-subset test --results-path /home/v-kunwei --criterion ctc --labels ltr --max-tokens 4000000 \ +--post-process letter diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/finetune_enes.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/finetune_enes.sh new file mode 100644 index 0000000000000000000000000000000000000000..eaae1476bc5f80640abee6a85bdd1f453c15d97a --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/finetune_enes.sh @@ -0,0 +1,85 @@ +# #################################### +# Hubert ED model # +# #################################### +#source /mnt/default/v-ziqzhang/.bashrc_sing + +[ $# -lt 4 ] && echo "Usage: $0 " && exit 0 +world_size=$1 +update_freq=$2 +w2v_path=$3 +cpt=$4 +Mount=$5 + +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=3 +[ -z $w2v_path ] && echo "you must specify a wav_path !" && exit 1 +[ -z $cpt ] && cpt=030.pt +[ -z $Mount ] && Mount=/mnt/default + + +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku +CONFIG_DIR=/mnt/output/users/v-kunwei/code/stpretrain_scripts/config +DATA_DIR="/mnt/output/users/v-kunwei/data/s2s_data/fin_enes100" + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="/mnt/output/users/v-kunwei/data/s2s_data/finetune/tune_ST_from_eneshu" +exp_name="tune_enes_lr5e-5_from_$cpt" +MODEL_DIR=$MODEL_DIR/$exp_name +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +max_tokens=490000 + +python $FAIRSEQ_ROOT/fairseq_cli/hydra_train.py \ + --config-dir $CONFIG_DIR/finetune_asr \ + --config-name base_100h \ + \ + +task.store_labels=true \ + task.labels='["spm"]' \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.add_decoder=true \ + +task.max_keep_size=490000 \ + \ + +model.reuse_text_emb=true \ + model._name="stbert_st" \ + model.w2v_path=${w2v_path} \ + model.add_decoder=true \ + \ + criterion._name="label_smoothed_cross_entropy" \ + +criterion.label_smoothing=0.2 \ + +criterion.report_accuracy=true \ + \ + lr_scheduler._name="polynomial_decay" \ + +lr_scheduler.warmup_updates=20000 \ + \ + optimization.lr=[0.0003] \ + optimization.max_update=100000 \ + checkpoint.best_checkpoint_metric="accuracy" \ + checkpoint.maximize_best_checkpoint_metric=true \ + checkpoint.save_interval=1 \ + \ + dataset.train_subset="train" \ + dataset.valid_subset="valid" \ + dataset.max_tokens=$max_tokens \ + optimization.update_freq=[${update_freq}] \ + \ + distributed_training.distributed_world_size=${world_size} \ + distributed_training.distributed_port=-1 \ + \ + common.log_interval=100 \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=${exp_name} + + + +sleep 20s + + # \ + # lr_scheduler._name="polynomial_decay" \ + # +lr_scheduler.warmup_updates=5000 \ + + +# /mnt/default/v-ziqzhang/data/stbert-ed/exp/ST_enes/sc2t_base_ende_32gpu_1accum/checkpoint_204_400000.pt diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/finetune_esen.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/finetune_esen.sh new file mode 100644 index 0000000000000000000000000000000000000000..a9051f67008817d200c797b67ee4919ed5e2797a --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/finetune_esen.sh @@ -0,0 +1,85 @@ +# #################################### +# Hubert ED model # +# #################################### +#source /mnt/default/v-ziqzhang/.bashrc_sing + +[ $# -lt 4 ] && echo "Usage: $0 " && exit 0 +world_size=$1 +update_freq=$2 +w2v_path=$3 +cpt=$4 +Mount=$5 + +[ -z $world_size ] && world_size=1 +[ -z $update_freq ] && update_freq=1 +[ -z $w2v_path ] && echo "you must specify a wav_path !" && exit 1 +[ -z $cpt ] && cpt=030.pt +[ -z $Mount ] && Mount=/mnt/default + + +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku +CONFIG_DIR=/mnt/output/users/v-kunwei/code/stpretrain_scripts/config +DATA_DIR="/mnt/output/users/v-kunwei/data/s2s_data/fin_esen" + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="/mnt/output/users/v-kunwei/data/s2s_data/finetune/tune_ST_from_esen" +exp_name="tune_esen_lr5e-5_from_$cpt" +MODEL_DIR=$MODEL_DIR/$exp_name +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +max_tokens=4900 + +python $FAIRSEQ_ROOT/fairseq_cli/hydra_train.py \ + --config-dir $CONFIG_DIR/finetune_asr \ + --config-name base_100h \ + \ + +task.store_labels=true \ + task.labels='["spm"]' \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.add_decoder=true \ + +task.max_keep_size=4900 \ + \ + +model.reuse_text_emb=true \ + model._name="stbert_st" \ + model.w2v_path=${w2v_path} \ + model.add_decoder=true \ + \ + criterion._name="label_smoothed_cross_entropy" \ + +criterion.label_smoothing=0.2 \ + +criterion.report_accuracy=true \ + \ + lr_scheduler._name="polynomial_decay" \ + +lr_scheduler.warmup_updates=20000 \ + \ + optimization.lr=[0.0002] \ + optimization.max_update=100000 \ + checkpoint.best_checkpoint_metric="accuracy" \ + checkpoint.maximize_best_checkpoint_metric=true \ + checkpoint.save_interval=1 \ + \ + dataset.train_subset="train" \ + dataset.valid_subset="valid" \ + dataset.max_tokens=$max_tokens \ + optimization.update_freq=[${update_freq}] \ + \ + distributed_training.distributed_world_size=${world_size} \ + distributed_training.distributed_port=-1 \ + \ + common.log_interval=100 \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=${exp_name} + + + +sleep 20s + + # \ + # lr_scheduler._name="polynomial_decay" \ + # +lr_scheduler.warmup_updates=5000 \ + + +# /mnt/default/v-ziqzhang/data/stbert-ed/exp/ST_enes/sc2t_base_ende_32gpu_1accum/checkpoint_204_400000.pt diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/inference_ed.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/inference_ed.sh new file mode 100644 index 0000000000000000000000000000000000000000..3fd9ef1231c827d980077a30b278b8986d31c4d7 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/inference_ed.sh @@ -0,0 +1,38 @@ +##################################### +# Hubert base model # +##################################### +[ $# -lt 1 ] && echo "Usage: $0 " && exit 0 + +model_path=$1 +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +#beam_size=$2 +gen_set=$2 +#lang=$4 +[ -z $gen_set ] && gen_set="test_et" +[ -z $beam_size ] && beam_size=2 +[ -z $lang ] && lang="fr" + + +#DATA_DIR=/mnt/output/users/v-kunwei/data/s2s_data/fin_enes +DATA_DIR=/home/v-kunwei +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku + +for subset in $gen_set; do + results_path=$src_dir/decode_${cpt}_beam${beam_size}/${subset} + [ ! -d $results_path ] && mkdir -p $results_path + + python $FAIRSEQ_ROOT/fairseq_cli/generate.py \ + $DATA_DIR --label-dir ${DATA_DIR} \ + --labels '["spm"]' --gen-subset ${subset} \ + --max-tokens 9000000 --task hubert_pretraining \ + --add-decoder --fine-tuning --random-crop \ + --path ${model_path} --results-path /home/v-kunwei --scoring sacrebleu \ + --max-len-a 0 --max-len-b 900 \ + --beam 10 --single-target + + tail -n 1 /home/v-kunwei/generate-*.txt + sleep 1s +done diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/base_ReleaseIter2_text2unicode_from400k.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/base_ReleaseIter2_text2unicode_from400k.sh new file mode 100644 index 0000000000000000000000000000000000000000..34d1594d8fda2954b8a70dbdfc059402571d70ee --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/base_ReleaseIter2_text2unicode_from400k.sh @@ -0,0 +1,70 @@ +##################################### +# Hubert mt model # +##################################### +[ $# -gt 3 ] && echo "Usage: $0 " && exit 0 +world_size=$1 +update_freq=$2 +w2v_path=$3 +Mount="" + +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=1 +[ -z $w2v_path ] && w2v_path="/mnt/output/users/v-kunwei/data/s2s_data/model_wo_emb_32_1004.pt" + + +langs="ltr,kmu" +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku +CONFIG_ROOT=/mnt/output/users/v-kunwei/code/stpretrain_scripts/config/translation +DATA_DIR=/mnt/output/users/v-kunwei/data/s2s_data/en_asr_data/ + +### set save-dir +MODEL_DIR="/mnt/output/users/v-kunwei/data/s2s_data/exp/text2unicode_en" +exp_name="base_pt400k_releaseiter2_${world_size}gpu_${update_freq}accum_lr1e-4_alll" +MODEL_DIR=$MODEL_DIR/$exp_name +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + + +python $FAIRSEQ_ROOT/fairseq_cli/hydra_train.py \ + --config-dir $CONFIG_ROOT \ + --config-name text2code \ + +task.data=$DATA_DIR \ + dataset.dataset_impl="raw" \ + +task.source_lang="ltr" +task.target_lang="kmu" \ + +task.normalize=false \ + \ + +criterion.label_smoothing=0.1 \ + +criterion.report_accuracy=true \ + optimizer.weight_decay=0.00001 \ + +lr_scheduler.lr="[0.0001]" \ + optimization.max_update=500000 \ + \ + +model.dropout=0.1 \ + +model.attention_dropout=0.1 \ + model.activation_dropout=0.1 \ + model.decoder_layerdrop=0 \ + model.layerdrop=0 \ + model.w2v_path=$w2v_path \ + +model.text_transformer_encoder_layers=6 \ + \ + dataset.train_subset="en_train" \ + dataset.valid_subset="en_dev" \ + optimization.update_freq=[${update_freq}] \ + optimization.clip_norm=5 \ + \ + common.seed=222 \ + common.log_interval=100 \ + common.log_format="json" \ + \ + distributed_training.distributed_world_size=${world_size} \ + distributed_training.nprocs_per_node=8 \ + distributed_training.ddp_backend="legacy_ddp" \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=${exp_name} \ + +sleep 10s + # sleep infinity + + diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/base_ReleaseIter2_text2unicode_from400k_es.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/base_ReleaseIter2_text2unicode_from400k_es.sh new file mode 100644 index 0000000000000000000000000000000000000000..1caf2f97f4b01def88b91d8a8422588f4f7a26d5 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/base_ReleaseIter2_text2unicode_from400k_es.sh @@ -0,0 +1,70 @@ +##################################### +# Hubert mt model # +##################################### +[ $# -gt 3 ] && echo "Usage: $0 " && exit 0 +world_size=$1 +update_freq=$2 +w2v_path=$3 +Mount="" + +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=1 +[ -z $w2v_path ] && w2v_path="/mnt/output/users/v-kunwei/data/s2s_data/model_es_emb_90_1004.pt" + + +langs="ltr,kmu" +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku +CONFIG_ROOT=/mnt/output/users/v-kunwei/code/stpretrain_scripts/config/translation +DATA_DIR=/mnt/output/users/v-kunwei/data/s2s_data/es_no_data/ + +### set save-dir +MODEL_DIR="/mnt/output/users/v-kunwei/data/s2s_data/exp/text2unicode_es" +exp_name="base_pt400k_releaseiter2_${world_size}gpu_${update_freq}accum_lr1e-4_no" +MODEL_DIR=$MODEL_DIR/$exp_name +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + + +python $FAIRSEQ_ROOT/fairseq_cli/hydra_train.py \ + --config-dir $CONFIG_ROOT \ + --config-name text2code \ + +task.data=$DATA_DIR \ + dataset.dataset_impl="raw" \ + +task.source_lang="ltr" +task.target_lang="kmu" \ + +task.normalize=false \ + \ + +criterion.label_smoothing=0.1 \ + +criterion.report_accuracy=true \ + optimizer.weight_decay=0.00001 \ + +lr_scheduler.lr="[0.0001]" \ + optimization.max_update=500000 \ + \ + +model.dropout=0.1 \ + +model.attention_dropout=0.1 \ + model.activation_dropout=0.1 \ + model.decoder_layerdrop=0 \ + model.layerdrop=0 \ + model.w2v_path=$w2v_path \ + +model.text_transformer_encoder_layers=6 \ + \ + dataset.train_subset="es_train" \ + dataset.valid_subset="es_dev" \ + optimization.update_freq=[${update_freq}] \ + optimization.clip_norm=5 \ + \ + common.seed=222 \ + common.log_interval=100 \ + common.log_format="json" \ + \ + distributed_training.distributed_world_size=${world_size} \ + distributed_training.nprocs_per_node=8 \ + distributed_training.ddp_backend="legacy_ddp" \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=${exp_name} \ + +sleep 10s + # sleep infinity + + diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/base_ReleaseIter2_text2unicode_from400k_es2.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/base_ReleaseIter2_text2unicode_from400k_es2.sh new file mode 100644 index 0000000000000000000000000000000000000000..910a6f35e43a0451b241a2033236039f009f0f75 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/base_ReleaseIter2_text2unicode_from400k_es2.sh @@ -0,0 +1,70 @@ +##################################### +# Hubert mt model # +##################################### +[ $# -gt 3 ] && echo "Usage: $0 " && exit 0 +world_size=$1 +update_freq=$2 +w2v_path=$3 +Mount="" + +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=1 +[ -z $w2v_path ] && w2v_path="/mnt/output/users/v-kunwei/data/s2s_data/model_es_emb_81_1004.pt" + + +langs="ltr,kmu" +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku +CONFIG_ROOT=/mnt/output/users/v-kunwei/code/stpretrain_scripts/config/translation +DATA_DIR=/mnt/output/users/v-kunwei/data/s2s_data/es_asrl_data/ + +### set save-dir +MODEL_DIR="/mnt/output/users/v-kunwei/data/s2s_data/exp/text2unicode_es" +exp_name="base_pt400k_releaseiter2_${world_size}gpu_${update_freq}accum_lr1e-4_ll" +MODEL_DIR=$MODEL_DIR/$exp_name +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + + +python $FAIRSEQ_ROOT/fairseq_cli/hydra_train.py \ + --config-dir $CONFIG_ROOT \ + --config-name text2code \ + +task.data=$DATA_DIR \ + dataset.dataset_impl="raw" \ + +task.source_lang="ltr" +task.target_lang="kmu" \ + +task.normalize=false \ + \ + +criterion.label_smoothing=0.1 \ + +criterion.report_accuracy=true \ + optimizer.weight_decay=0.00001 \ + +lr_scheduler.lr="[0.0001]" \ + optimization.max_update=500000 \ + \ + +model.dropout=0.1 \ + +model.attention_dropout=0.1 \ + model.activation_dropout=0.1 \ + model.decoder_layerdrop=0 \ + model.layerdrop=0 \ + model.w2v_path=$w2v_path \ + +model.text_transformer_encoder_layers=6 \ + \ + dataset.train_subset="es_train" \ + dataset.valid_subset="es_dev" \ + optimization.update_freq=[${update_freq}] \ + optimization.clip_norm=5 \ + \ + common.seed=222 \ + common.log_interval=100 \ + common.log_format="json" \ + \ + distributed_training.distributed_world_size=${world_size} \ + distributed_training.nprocs_per_node=8 \ + distributed_training.ddp_backend="legacy_ddp" \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=${exp_name} \ + +sleep 10s + # sleep infinity + + diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/decode_text2code.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/decode_text2code.sh new file mode 100644 index 0000000000000000000000000000000000000000..866146d4a26cea23c4dc51d5f53c90f58bfadc21 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/decode_text2code.sh @@ -0,0 +1,51 @@ + +##################################### +# Hubert ED model # +##################################### +[ $# -lt 1 ] && echo "Usage: $0 " && exit 0 +#source /mnt/default/v-ziqzhang/.bashrc_sing + +model_path=$1 +gen_set=$2 +tgt=$3 +src="ltr" +max_tokens=$4 +word_size=$5 +rank=$6 +outdir=$7 + +[ -z $tgt ] && tgt="kmu" +[ -z $gen_set ] && gen_set="dev_clean" +[ -z $word_size ] && word_size=1 +[ -z $rank ] && rank=0 +[ -z $max_tokens ] && max_tokens=2000 + +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlst +DATA_DIR=${gen_set%/*} +gen_set=${gen_set##*/} +[ $gen_set == "test" ] && DATA_DIR=/mnt/output/users/v-kunwei/data/s2s_data/en_asr_data +[ -z $outdir ] && outdir=$DATA_DIR + + +results_path=$outdir/pseudo_${gen_set}_${rank} +[ ! -d $results_path ] && mkdir -p $results_path + +for subset in $gen_set; do + python $FAIRSEQ_ROOT/fairseq_cli/generate_mt_label.py $DATA_DIR \ + --path ${model_path} \ + --task "translation_from_jst" \ + --max-target-positions 3000 \ + --gen-subset $subset \ + -t $tgt -s "ltr" \ + --max-tokens ${max_tokens} \ + --dataset-impl "raw" \ + --max-len-a 2 --max-len-b 100 \ + --results-path $results_path \ + --skip-invalid-size-inputs-valid-test \ + --distributed-world-size $word_size --distributed-rank $rank \ + + echo "$model" > $results_path/model.record + sleep 1s +done | tee $results_path/decode.log + +sleep 2s diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/decode_text2code_beam2.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/decode_text2code_beam2.sh new file mode 100644 index 0000000000000000000000000000000000000000..9cad721b3dfcf0bbca8d82b57290dacb616b74b2 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/decode_text2code_beam2.sh @@ -0,0 +1,52 @@ + +##################################### +# Hubert ED model # +##################################### +[ $# -lt 1 ] && echo "Usage: $0 " && exit 0 +#source /mnt/default/v-ziqzhang/.bashrc_sing + +model_path=$1 +gen_set=$2 +tgt=$3 +src="ltr" +max_tokens=$4 +word_size=$5 +rank=$6 +outdir=$7 + +[ -z $tgt ] && tgt="kmu" +[ -z $gen_set ] && gen_set="dev_clean" +[ -z $word_size ] && word_size=1 +[ -z $rank ] && rank=0 +[ -z $max_tokens ] && max_tokens=2000 + +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlstku +DATA_DIR=${gen_set%/*} +gen_set=${gen_set##*/} +[ $gen_set == "test" ] && DATA_DIR=/mnt/output/users/v-kunwei/code/fairseq_mlstku +[ -z $outdir ] && outdir=$DATA_DIR + + +results_path=$outdir/pseudo_${gen_set}_${rank} +[ ! -d $results_path ] && mkdir -p $results_path + +for subset in $gen_set; do + python $FAIRSEQ_ROOT/fairseq_cli/generate_mt_label.py $DATA_DIR \ + --path ${model_path} \ + --task "translation_from_jst" \ + --max-target-positions 3000 \ + --gen-subset $subset \ + -t $tgt -s "ltr" \ + --dataset-impl "raw" \ + --max-tokens ${max_tokens} \ + --beam 2 \ + --max-len-a 2 --max-len-b 100 \ + --results-path $results_path \ + --skip-invalid-size-inputs-valid-test \ + --distributed-world-size $word_size --distributed-rank $rank \ + + echo "$model" > $results_path/model.record + sleep 1s +done | tee $results_path/decode.log + +sleep 2s diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/inference_code_bleu.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/inference_code_bleu.sh new file mode 100644 index 0000000000000000000000000000000000000000..240d4874c02fb1b06c18af32382ae4aee3297113 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/inference_code_bleu.sh @@ -0,0 +1,52 @@ + +##################################### +# Hubert ED model # +##################################### +[ $# -lt 1 ] && echo "Usage: $0 " && exit 0 + +model_path=$1 +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +gen_set=$2 +tgt=$3 +outdir=$4 +src="ltr" +[ -z $tgt ] && tgt="kmu" +[ -z $gen_set ] && gen_set="es_dev" +[ -z $outdir ] && outdir=$src_dir/decode_${cpt} + +DATA_DIR=/mnt/output/users/v-kunwei/data/s2s_data/es_asr_data/ +# DATA_DIR=/mnt/default/v-ziqzhang/data/stbert/data/librispeech/speech2c_joint_splitenc_400k/ltr-$tgt +# DATA_DIR=/mnt/default/v-ziqzhang/data/stbert/data/librispeech/speech2c_400k/ltr-$tgt +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlst + +langs="ltr,$tgt" + +for subset in $gen_set; do + results_path=$outdir/${subset} + [ ! -d $results_path ] && mkdir -p $results_path + + python $FAIRSEQ_ROOT/fairseq_cli/generate.py $DATA_DIR \ + --path ${model_path} \ + --task "translation_from_jst" \ + --max-target-positions 3000 \ + --gen-subset $subset \ + -t $tgt -s "ltr" --dataset-impl "raw" \ + --batch-size 16 \ + --max-len-a 2 --max-len-b 400 \ + --results-path $results_path \ + --scoring sacrebleu $extra + + echo $results_path + tail -n 1 $results_path/generate-*.txt + sleep 1s +done + +# --distributed-world-size 1000 --distributed-rank 0 \ + +sleep 2s + +# cat generate-newstest2020_enja.txt | grep "^D-" | cut -d'-' -f 2- | sort -n -k1 | cut -f3 > decode-newstest2020_enja.txt +# sacrebleu -t wmt20 -l en-ja -i decode-newstest2020_enja.txt --tokenize char diff --git a/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/inference_code_wer.sh b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/inference_code_wer.sh new file mode 100644 index 0000000000000000000000000000000000000000..8fa9670ff8629ccc857d55c7c07983cc3d2c700b --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/stpretrain_scripts/train_text2code/inference_code_wer.sh @@ -0,0 +1,53 @@ + +##################################### +# Hubert ED model # +##################################### +[ $# -lt 1 ] && echo "Usage: $0 " && exit 0 + +model_path=$1 +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +gen_set=$2 +tgt=$3 +outdir=$4 +src="ltr" +[ -z $tgt ] && tgt="kmu" +[ -z $gen_set ] && gen_set="en_dev" +[ -z $outdir ] && outdir=$src_dir/decode_${cpt} + +# DATA_DIR=/mnt/default/v-ziqzhang/data/stbert/data/librispeech/hubert_release_iter2_layer9_kmeans/ltr-$tgt +# DATA_DIR=/mnt/default/v-ziqzhang/data/stbert/data/librispeech/speech2c_joint_splitenc_400k/ltr-$tgt +#DATA_DIR=/mnt/default/v-ziqzhang/data/stbert/data/librispeech/speech2c_400k/ltr-$tgt +DATA_DIR=/mnt/output/users/v-kunwei/data/s2s_data/es_asr_data/ +FAIRSEQ_ROOT=/mnt/output/users/v-kunwei/code/fairseq_mlst + +langs="ltr,$tgt" + +for subset in $gen_set; do + results_path=$outdir/${subset} + [ ! -d $results_path ] && mkdir -p $results_path + + python $FAIRSEQ_ROOT/fairseq_cli/generate.py $DATA_DIR \ + --path ${model_path} \ + --task "translation_from_jst" \ + --max-target-positions 3000 \ + --gen-subset $subset \ + -t $tgt -s "ltr" --dataset-impl "raw" \ + --batch-size 16 \ + --max-len-a 2 --max-len-b 400 \ + --results-path $results_path \ + --scoring wer + + echo $results_path + tail -n 1 $results_path/generate-*.txt + sleep 1s +done + +# --distributed-world-size 1000 --distributed-rank 0 \ + +sleep 2s + +# cat generate-newstest2020_enja.txt | grep "^D-" | cut -d'-' -f 2- | sort -n -k1 | cut -f3 > decode-newstest2020_enja.txt +# sacrebleu -t wmt20 -l en-ja -i decode-newstest2020_enja.txt --tokenize char diff --git a/SpeechT5/Speech2S/speech2s/tasks/joint_sc2t_pretrain.py b/SpeechT5/Speech2S/speech2s/tasks/joint_sc2t_pretrain.py new file mode 100644 index 0000000000000000000000000000000000000000..db6e4e611f01d58f53ede5fd529fb9ceca44bcc8 --- /dev/null +++ b/SpeechT5/Speech2S/speech2s/tasks/joint_sc2t_pretrain.py @@ -0,0 +1,1004 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import os +import sys +from typing import Dict, List, Optional, Tuple +from pathlib import Path + +import numpy as np +from argparse import Namespace +from collections import OrderedDict + +import torch +from dataclasses import dataclass, field +from fairseq.data import ( + Dictionary, + encoders, + data_utils, + StripTokenDataset, + PrependTokenDataset, + AppendTokenDataset, + DenoisingDataset, + ConcatDataset, + FairseqDataset, + iterators, + ResamplingDataset, + MaskTokensDataset, + LanguagePairDataset, +) +from fairseq.data.audio.speech_to_text_joint_dataset import S2TJointDataConfig +from fairseq.data.shorten_dataset import maybe_shorten_dataset +# from fairseq.data.encoders.utils import get_whole_word_mask +from fairseq.dataclass.configs import FairseqDataclass +from fairseq.tasks import register_task +from fairseq.tasks.fairseq_task import FairseqTask +from fairseq.dataclass.constants import ChoiceEnum +from omegaconf import MISSING + +from speechut.data.multimodal_corpus_dataset import MultiCorpusDataset +from speechut.data.load_langpair_dataset import load_langpair_dataset +from speechut.data.language_trible_dataset import LanguageTripleDataset, load_langtriple_dataset +from speechut.data.hubert_dataset import HubertDataset + +logger = logging.getLogger(__name__) + +TOKENIZER_CHOICES = ChoiceEnum(["sentencepiece", "hubert_letters", "none"]) + +def _lang_token(lang: str): + return "".format(lang) + +def _lang_token_index(dic: Dictionary, lang: str): + """Return language token index.""" + idx = dic.index(_lang_token(lang)) + assert idx != dic.unk_index, "cannot find language token for lang {}".format(lang) + return idx + + +class LabelEncoder(object): + def __init__(self, dictionary: Dictionary) -> None: + self.dictionary = dictionary + + def __call__(self, label: str) -> List[str]: + return self.dictionary.encode_line( + label, append_eos=False, add_if_not_exist=False, + ) + + +### wrap the initial get_whole_word_mask which needs bpe_tokenizer, +### here we just assume words are splited by "|" or "" +def get_whole_word_mask(args, dictionary): + def is_beginning_of_word(i): + if i < dictionary.nspecial: + # special elements are always considered beginnings + return True + tok = dictionary[i] + if tok.startswith("madeupword"): + return True + elif tok in ["", "", "", "", "|", ""]: + return True + else: + return False + + mask_whole_words = torch.ByteTensor( + list(map(is_beginning_of_word, range(len(dictionary)))) + ) + return mask_whole_words + +def get_repeative_start(tokens): + """ + tokens: torch.Tensor with repeative tokens + """ + length = len(tokens) + rep_start_id = tokens[:-1] != tokens[1:] + return torch.cat([torch.tensor([True]), rep_start_id]) + +@dataclass +class TextPretrainingConfig(FairseqDataclass): + ### added for joint pretraining + text_data: Optional[str] = field( + default=None, + metadata={ + "help": "if set, path to text data directory", + }, + ) + seed: Optional[int] = field( + default=1, + metadata={ + "help": "for ordered_indices in MulticorpusDataset", + }, + ) + tokens_per_sample: Optional[int] = field( + default=512, + metadata={ + "help": "max number of total tokens over all segments per sample for dataset", + }, + ) + tokens_per_sample_tgt: Optional[int] = field( + default=512, + metadata={ + "help": "max number of total tokens over all segments per target sample for dataset", + }, + ) + sample_break_mode: Optional[str] = field( + default="eos", + metadata={ + "help": "mode for breaking sentence", + }, + ) + mask: Optional[float] = field( + default=0.3, + metadata={ + "help": "fraction of words/subwords that will be masked", + }, + ) + leave_unmasked_prob: float = field( + default=0.1, + metadata={"help": "probability that a masked token is unmasked"}, + ) + mask_random: Optional[float] = field( + default=0.1, + metadata={ + "help": "instead of using [MASK], use random token this often", + }, + ) + freq_weighted_replacement: bool = field( + default=False, + metadata={"help": "sample random replacement words based on word frequencies"}, + ) + mask_whole_words: bool = field( + default=True, + metadata={"help": "mask whole words; you may also want to set --bpe"}, + ) + mask_repeative_tokens: bool = field( + default=True, + metadata={"help": "mask repeative_tokens; if mask_whole_words=False"}, + ) + mask_multiple_length: int = field( + default=1, + metadata={"help": "repeat the mask indices multiple times"}, + ) + mask_stdev: float = field( + default=0.0, + metadata={"help": "stdev of the mask length"}, + ) + shorten_method: Optional[str] = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed tokens_per_sample", + "choices": "none/truncate/random_crop" + }, + ) + shorten_data_split_list: Optional[str] = field( + default="", + metadata={ + "help": "comma_separated list of dataset splits to apply shortening to, e.g., train,valid (default: all dataset splits)", + }, + ) + + ### below hypra-parameters is used in bart + insert: Optional[float] = field( + default=0.0, + metadata={ + "help": "insert this percentage of additional random tokens", + }, + ) + permute: Optional[float] = field( + default=0.0, + metadata={ + "help": "take this proportion of subwords and permute them", + }, + ) + rotate: Optional[float] = field( + default=0.0, + metadata={ + "help": "rotate this proportion of inputs", + }, + ) + poisson_lambda: Optional[float] = field( + default=3.5, + metadata={ + "help": "randomly shuffle sentences for this proportion of inputs", + }, + ) + permute_sentences: Optional[float] = field( + default=0.0, + metadata={ + "help": "shuffle this proportion of sentences in all inputs", + }, + ) + mask_length: Optional[str] = field( + default="span-poisson", + metadata={ + "help": "mask length to choose", + "choice": "subword/word/span-poisson" + }, + ) + replace_length: Optional[int] = field( + default=1, + metadata={ + "help": "when masking N tokens, replace with 0, 1, or N tokens (use -1 for N)", + }, + ) + shuffle_instance: Optional[bool] = field( + default=False, + metadata={"help": "shuffle instance"}, + ) + max_source_positions: Optional[int] = field( + default=1024, + metadata={"help": "max number of tokens in the source sequence"}, + ) + max_target_positions: Optional[int] = field( + default=1024, + metadata={"help": "max number of tokens in the target sequence"}, + ) + bpe: Optional[str] = field( + default="", + metadata={ + "help": "will wrapped by the text_data_config yaml", + }, + ) + data_config: Optional[str] = field( + default=None, + metadata={ + "help": "a config yaml specify the bpe model of text data", + }, + ) + text_maxtokens_ratio: Optional[float] = field( + default=1.0, + metadata={ + "help": "for text, max_tokens = max_tokens * text_maxtokens_ratio / 320 ", + }, + ) + prepend_tgt_lang_tag: bool = field( + default=False, + metadata={"help": "prepend tgt_lang_tag to replace "}, + ) + mask_text_ratio: Optional[float] = field( + default=0.0, + metadata={ + "help": "mask_text_ratio, for paired data", + }, + ) + truncate_mono_source: bool = field( + default=True, + metadata={"help": "truncate mono source-side examples that exceed max-positions"}, + ) + + +@dataclass +class JointPretrainingConfig(FairseqDataclass): + data: str = field( + default=MISSING, metadata={"help": "path to speech data directory"} + ) + fine_tuning: bool = field( + default=False, metadata={"help": "set to true if fine-tuning Hubert"} + ) + labels: List[str] = field( + default_factory=lambda: ["ltr"], + metadata={ + "help": ( + "extension of the label files to load, frame-level labels for" + " pre-training, and sequence-level label for fine-tuning" + ) + }, + ) + label_dir: Optional[str] = field( + default=None, + metadata={ + "help": "if set, looks for labels in this directory instead", + }, + ) + label_rate: int = field( + default=-1, + metadata={"help": "label frame rate. -1 for sequence label"}, + ) + sample_rate: int = field( + default=16_000, + metadata={ + "help": "target sample rate. audio files will be up/down " + "sampled to this rate" + }, + ) + normalize: bool = field( + default=False, + metadata={ + "help": "if set, normalizes input to have 0 mean and unit variance" + }, + ) + enable_padding: bool = field( + default=False, + metadata={"help": "pad shorter samples instead of cropping"}, + ) + max_keep_size: Optional[int] = field( + default=None, + metadata={"help": "exclude sample longer than this"}, + ) + max_sample_size: Optional[int] = field( + default=None, + metadata={"help": "max sample size to crop to for batching"}, + ) + min_sample_size: Optional[int] = field( + default=None, + metadata={"help": "min sample size to crop to for batching"}, + ) + single_target: Optional[bool] = field( + default=False, + metadata={ + "help": "if set, AddTargetDatasets outputs same keys " + "as AddTargetDataset" + }, + ) + random_crop: Optional[bool] = field( + default=True, + metadata={"help": "always crop from the beginning if false"}, + ) + pad_audio: Optional[bool] = field( + default=False, + metadata={"help": "pad audio to the longest one in the batch if true"}, + ) + store_labels: Optional[bool] = field( + default=True, + metadata={"help": "store spm labels in memory, should be true when fine-tune with bpe"}, + ) + add_decoder_target: bool = field( + default=False, + metadata={"help": "contral the model architecture, if set True, load reduced unit as target"}, + ) + split_modality_batch: bool = field( + default=False, + metadata={"help": "whether create all samples of different modalities in a batch"}, + ) + speech_tgt_lang: str = field( + default="", + metadata={"help": "prepend to prev_output_tokens to replace , only used for decoder"}, + ) + speech_sampling_alpha: float = field( + default=0.2, + metadata={ + "help": "Hyper-parameter alpha = 1/T for temperature-based speech resampling." + "(alpha = 1 for no resampling)" + }, + ) + text_sampling_alpha: float = field( + default=0.2, + metadata={ + "help": "Hyper-parameter alpha = 1/T for temperature-based text resampling." + "(alpha = 1 for no resampling)" + }, + ) + hubert_tokenizer: Optional[TOKENIZER_CHOICES] = field( + default="none", + metadata={"help": "which tokenizer for processing text"}, + ) + sp_path: Optional[str] = field( + default=None, + metadata={"help": "sentencepiece model path if using bpe tokenizer"}, + ) + text_cfg: TextPretrainingConfig = TextPretrainingConfig() + # For inference + ctc_weight: float = field( + default=0.0, + metadata={"help": "ctc weight during inference"}, + ) + lm_dict: Optional[str] = field( + default="dict.txt", + metadata={"help": "dict used for decoding with language model, should be in cfg.data/"}, + ) + +@register_task("joint_sc2t_pretraining", dataclass=JointPretrainingConfig) +class Jsc2tPretrainingTask(FairseqTask): + + cfg: JointPretrainingConfig + + def __init__( + self, + cfg: JointPretrainingConfig, + load_local_states: True, + ) -> None: + super().__init__(cfg) + + logger.info(f"current directory is {os.getcwd()}") + logger.info(f"JSTPretrainingTask Config {cfg}") + + self.cfg = cfg + self.fine_tuning = cfg.fine_tuning + self.blank_symbol = "" + + if load_local_states: + self.state.add_factory("hubert_tokenizer", self.build_tokenizer) + if self.cfg.text_cfg.text_data is not None and os.path.exists(self.cfg.text_cfg.text_data): + self.state.add_factory("text_dictionary", self.load_text_dictionary) + self.state.add_factory("text_src_dictionary", self.load_text_src_dictionary) + if cfg.fine_tuning: + self.state.add_factory("target_dictionary", self.load_dictionaries) + else: + self.state.add_factory("dictionaries", self.load_dictionaries) + + if cfg.text_cfg.data_config is not None: + self.text_data_cfg = S2TJointDataConfig(Path(f"{cfg.text_cfg.text_data}/{cfg.text_cfg.data_config}")) + self.cfg.text_cfg.bpe = self.text_data_cfg.bpe_tokenizer["bpe"] + else: + self.text_data_cfg = None + + @property + def source_dictionary(self) -> Optional[Dictionary]: + return None + + @property + def target_dictionary(self) -> Optional[Dictionary]: + return self.state.target_dictionary + + @property + def dictionaries(self) -> List[Dictionary]: + return self.state.dictionaries + + @property + def text_dictionary(self) -> Optional[Dictionary]: + return self.state.text_dictionary + + @property + def text_src_dictionary(self) -> Optional[Dictionary]: + return self.state.text_src_dictionary + + @property + def hubert_tokenizer(self): + return self.state.hubert_tokenizer + + def load_dictionaries(self): + label_dir = self.cfg.data if self.cfg.label_dir is None else self.cfg.label_dir + dictionaries = [Dictionary.load(f"{label_dir}/dict.{label}.txt") for label in self.cfg.labels] + if not self.cfg.fine_tuning: + for dictionary in dictionaries: + dictionary.add_symbol("") + return dictionaries[0] if self.cfg.fine_tuning else dictionaries + + def load_text_dictionary(self): + tgt_dict_path = f"{self.cfg.text_cfg.text_data}/{self.text_data_cfg.vocab_filename if self.text_data_cfg is not None else 'dict.txt'}" + if not os.path.isfile(tgt_dict_path): + raise FileNotFoundError(f"Dict not found: {tgt_dict_path}") + text_dictionary = Dictionary.load(tgt_dict_path) + self.mask_idx = text_dictionary.add_symbol("") + return text_dictionary + + def load_text_src_dictionary(self): + src_dict_path = f"{self.cfg.text_cfg.text_data}/{self.text_data_cfg.src_vocab_filename if self.text_data_cfg is not None else 'dict.txt'}" + if not os.path.isfile(src_dict_path): + raise FileNotFoundError(f"Dict not found: {src_dict_path}") + src_text_dictionary = Dictionary.load(src_dict_path) + self.mask_idx = src_text_dictionary.add_symbol("") + return src_text_dictionary + + @classmethod + def setup_task( + cls, cfg: JointPretrainingConfig, **kwargs + ) -> "Jsc2tPretrainingTask": + load_local_states = kwargs.get("load_local_states", True) + return cls(cfg, load_local_states) + + def get_label_dir(self) -> str: + if self.cfg.label_dir is None: + return self.cfg.data + return self.cfg.label_dir + + def load_paired_dataset(self, text_split, truncate_source=False): + text_split, lp = text_split.rsplit('.', 1) # e.g. "libritext.ltr-ltr" + if len(lp.split("-")) == 2: + src, tgt = lp.split("-") + if src == tgt: + logger.warn(f"| trying to load monolingual dataset {text_split}.{lp}, please check your task is right.") + paired_dataset = self.load_char_bart_dataset(f"{text_split}.{lp}.{tgt}") + return paired_dataset + paired_dataset = load_langpair_dataset( + self.cfg.text_cfg.text_data, + text_split, + src, + self.text_src_dictionary, + tgt, + self.text_dictionary, + combine=True, + dataset_impl=None, + upsample_primary=1, + left_pad_source=False, + left_pad_target=False, + max_source_positions=self.cfg.text_cfg.tokens_per_sample, + max_target_positions=self.cfg.text_cfg.tokens_per_sample, + truncate_source=truncate_source, + prepend_bos=False, + load_alignments=False, + append_source_id=True if self.cfg.text_cfg.prepend_tgt_lang_tag else False, + lang_format="" if self.cfg.text_cfg.prepend_tgt_lang_tag else "[{}]", + input_feeding=self.cfg.add_decoder_target, + ) + if self.cfg.text_cfg.mask_text_ratio > 0: + # add mask + self.mask_idx = self.text_src_dictionary.index("") + mask_whole_words = None + if self.cfg.text_cfg.mask_whole_words: + mask_whole_words = get_whole_word_mask(self.cfg.text_cfg, self.text_src_dictionary) + elif self.cfg.text_cfg.mask_repeative_tokens: + mask_whole_words = get_repeative_start + + src_dataset, src_unmasked_dataset = MaskTokensDataset.apply_mask( + paired_dataset.src, + self.text_src_dictionary, + pad_idx=self.text_src_dictionary.pad(), + mask_idx=self.mask_idx, + seed=self.cfg.text_cfg.seed, + mask_prob=self.cfg.text_cfg.mask_text_ratio, + leave_unmasked_prob=self.cfg.text_cfg.leave_unmasked_prob, + random_token_prob=self.cfg.text_cfg.mask_random, + freq_weighted_replacement=self.cfg.text_cfg.freq_weighted_replacement, + mask_whole_words=mask_whole_words, + mask_multiple_length=self.cfg.text_cfg.mask_multiple_length, + mask_stdev=self.cfg.text_cfg.mask_stdev, + ) + tgt_dataset = paired_dataset.tgt if paired_dataset.tgt is not None else src_unmasked_dataset + paired_dataset = LanguageTripleDataset( + src_dataset, + src_dataset.sizes, + self.text_src_dictionary, + src_unmasked_dataset, + src_unmasked_dataset.sizes, + self.text_src_dictionary, + tgt_dataset, + tgt_dataset.sizes, + self.text_dictionary, + left_pad_source=False, + left_pad_target=False, + align_dataset=None, + eos=None, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + ) + else: + src, ref, tgt = lp.split("-") + paired_dataset = load_langtriple_dataset( + self.cfg.text_cfg.text_data, + text_split, + src, + self.text_src_dictionary, + ref, + self.dictionaries[-1], + tgt, + self.text_dictionary, + combine=True, + dataset_impl=None, + upsample_primary=1, + left_pad_source=False, + left_pad_target=False, + max_source_positions=self.cfg.text_cfg.tokens_per_sample, + max_target_positions=self.cfg.text_cfg.tokens_per_sample, + truncate_source=truncate_source, + prepend_bos=False, + load_alignments=False, + append_source_id=True if self.cfg.text_cfg.prepend_tgt_lang_tag else False, + lang_format="" if self.cfg.text_cfg.prepend_tgt_lang_tag else "[{}]", + ) + return paired_dataset + + def load_dataset(self, split: str, epoch=1, **kwargs) -> None: + """ + Create Wav dataset for audio, and Index dataset for phonemized text, + then concatenate them to by fairseq.data.multi_corpus_dataset.MultiCorpusDataset. + """ + speech_splits = split.split('+')[0].split(',') + ### 1st, create a speech dataset using STSpeechDataset (modified from HubertDataset) + dicts = [self.target_dictionary] if self.cfg.fine_tuning else self.dictionaries + pad_list = [dict.pad() for dict in dicts] + eos_list = [dict.eos() for dict in dicts] + procs = [LabelEncoder(dict) for dict in dicts] + if self.cfg.speech_tgt_lang != "": + tgt_lang_idx = _lang_token_index(dicts[0], self.cfg.speech_tgt_lang) + logger.info(f"Will prepend <{tgt_lang_idx}> at the beginning of prev_output_tokens to replace ") + else: + tgt_lang_idx = None + + + # hubert v1: pad_audio=True, random_crop=False; + speech_datasets = [] + for speech_split in speech_splits: + paths = [ + f"{self.get_label_dir()}/{speech_split}.{l}" for l in self.cfg.labels + ] + speech_datasets.append( + HubertDataset( + f"{self.cfg.data}/{speech_split}.tsv", + sample_rate=self.cfg.sample_rate, + label_paths=paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_keep_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=self.cfg.store_labels, + random_crop=self.cfg.random_crop, + single_target=self.cfg.single_target, + tgt_dict=dicts[0], + add_decoder_target=self.cfg.add_decoder_target, + fine_tuning=self.cfg.fine_tuning, + tgt_lang_idx=tgt_lang_idx, + tokenizer=self.hubert_tokenizer, + ) + ) + if len(speech_datasets) > 1: + speech_dataset = ConcatDataset(speech_datasets) + else: + speech_dataset = speech_datasets[0] + + has_text = len(split.split('+')) > 1 + if not has_text: + assert speech_dataset is not None + self.datasets[split] = speech_dataset + return + + ### 2nd, create paired/mono text datasets using Langpairdataset + if split.split('+')[1] != '': + paired_splits = [paired_split for paired_split in split.split('+')[1].split(',') if paired_split != ''] + paired_datasets = [self.load_paired_dataset(paired_split) for paired_split in paired_splits] + else: + paired_splits, paired_datasets = [], [] + + if len(split.split('+')) > 2 and split.split('+')[2] != '': + mono_splits = [mono_split for mono_split in split.split('+')[2].split(',') if mono_split != ''] + mono_datasets = [self.load_paired_dataset(mono_split, truncate_source=self.cfg.text_cfg.truncate_mono_source) for mono_split in mono_splits] + else: + mono_splits, mono_datasets = [], [] + + assert len(mono_datasets + paired_datasets) > 0, f"split {split} has no text! you should check out for that" + + ### 3rd, if provided, create a supervised dataset with labeled data + if len(split.split('+')) > 3 and split.split('+')[3] != '': + assert len(paired_splits) > 0, f"supervised dataset can not be loaded without text paired dataset!" + tgt = paired_splits[0].rsplit('.', 1)[1].split("-")[1] + sup_split = split.split('+')[3] + + sup_dataset = HubertDataset( + f"{self.cfg.data}/{sup_split}.tsv", + sample_rate=self.cfg.sample_rate, + label_paths=[f"{self.get_label_dir()}/{sup_split}.{tgt}"], + label_rates=[-1], + pad_list=[self.text_dictionary.pad()], + eos_list=[self.text_dictionary.eos()], + label_processors=[LabelEncoder(self.text_dictionary)], + max_keep_sample_size=self.cfg.max_keep_size, + min_keep_sample_size=None, + max_sample_size=None, + pad_audio=True, + normalize=self.cfg.normalize, + store_labels=self.cfg.store_labels, + random_crop=False, + single_target=True, + tgt_dict=self.text_dictionary, + add_decoder_target=self.cfg.add_decoder_target, + fine_tuning=True, + tgt_lang_idx=None, + tokenizer=None, + ) + else: + sup_dataset = None + + ### 4th, compose a MultiCorpusDataset + dataset_dict, max_positions_dict, distributions, max_tokens_ratios = self.resample_multi_modality_dataset( + speech_dataset, sup_dataset, mono_datasets, paired_datasets, mono_splits, paired_splits, epoch=epoch, + ) + self.datasets[split] = MultiCorpusDataset( + dataset_dict, + max_positions=max_positions_dict, + distribution=distributions, + max_tokens_ratio=max_tokens_ratios, + seed=self.cfg.text_cfg.seed, + sort_indices=True, + ) + + + def max_positions(self) -> Tuple[int, int]: + return (sys.maxsize, sys.maxsize) + + def filter_indices_by_size( + self, indices: np.array, *args, **kwargs + ) -> np.array: + return indices + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=1, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + """ + Get an iterator that yields batches of data from the given dataset. + Args: + dataset (~fairseq.data.FairseqDataset): dataset to batch + max_tokens (int, optional): max number of tokens in each batch + (default: None). + max_sentences (int, optional): max number of sentences in each + batch (default: None). + max_positions (optional): max sentence length supported by the + model (default: None). + ignore_invalid_inputs (bool, optional): don't raise Exception for + sentences that are too long (default: False). + required_batch_size_multiple (int, optional): require batch size to + be a multiple of N (default: 1). + seed (int, optional): seed for random number generator for + reproducibility (default: 1). + num_shards (int, optional): shard the data iterator into N + shards (default: 1). + shard_id (int, optional): which shard of the data iterator to + return (default: 0). + num_workers (int, optional): how many subprocesses to use for data + loading. 0 means the data will be loaded in the main process + (default: 0). + epoch (int, optional): the epoch to start the iterator from + (default: 1). + data_buffer_size (int, optional): number of batches to + preload (default: 0). + disable_iterator_cache (bool, optional): don't cache the + EpochBatchIterator (ignores `FairseqTask::can_reuse_epoch_itr`) + (default: False). + skip_remainder_batch (bool, optional): if set, discard the last + batch in each training epoch, as the last batch is often smaller than + local_batch_size * distributed_word_size (default: ``True``). + grouped_shuffling (bool, optional): group batches with each groups + containing num_shards batches and shuffle groups. Reduces difference + between sequence lengths among workers for batches sorted by length. + update_epoch_batch_itr (bool optional): if true then donot use the cached + batch iterator for the epoch + + Returns: + ~fairseq.iterators.EpochBatchIterator: a batched iterator over the + given dataset split + """ + if self.fine_tuning or not isinstance(dataset, MultiCorpusDataset): + return super().get_batch_iterator( + dataset, + max_tokens=max_tokens, + max_sentences=max_sentences, + max_positions=max_positions, + ignore_invalid_inputs=ignore_invalid_inputs, + required_batch_size_multiple=required_batch_size_multiple, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + data_buffer_size=data_buffer_size, + disable_iterator_cache=disable_iterator_cache, + skip_remainder_batch=skip_remainder_batch, + grouped_shuffling=grouped_shuffling, + update_epoch_batch_itr=update_epoch_batch_itr, + ) + + can_reuse_epoch_itr = ( + not disable_iterator_cache + and not update_epoch_batch_itr + and self.can_reuse_epoch_itr(dataset) + ) + if can_reuse_epoch_itr and dataset in self.dataset_to_epoch_iter: + logger.debug("reusing EpochBatchIterator for epoch {}".format(epoch)) + return self.dataset_to_epoch_iter[dataset] + + assert isinstance(dataset, FairseqDataset) + + # initialize the dataset with the correct starting epoch + dataset.set_epoch(epoch) + + # get indices ordered by example size + with data_utils.numpy_seed(seed): + indices = dataset.ordered_indices() + + # filter examples that are too large + if max_positions is not None: + indices = self.filter_indices_by_size( + indices, dataset, max_positions, ignore_invalid_inputs + ) + + # create mini-batches with given size constraints + batch_sampler = dataset.get_batch_sampler( + indices, + num_shards, + seed, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + split_modality_batch=self.cfg.split_modality_batch, + ) + + # return a reusable, sharded iterator + epoch_iter = iterators.EpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_sampler=batch_sampler, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + buffer_size=data_buffer_size, + skip_remainder_batch=skip_remainder_batch, + disable_shuffling=True, + grouped_shuffling=grouped_shuffling, + ) + + if can_reuse_epoch_itr: + self.dataset_to_epoch_iter[dataset] = epoch_iter + + return epoch_iter + + def build_generator( + self, + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=None, + ): + """Build ED-CTC generator for finet-tuned ASR model""" + from speechut.squence_generator import SequenceGenerator + extra_gen_cls_kwargs = { + "ctc_weight": self.cfg.ctc_weight, + "lm_dict": Dictionary.load(os.path.join(self.cfg.data, self.cfg.lm_dict)), + **extra_gen_cls_kwargs + } + return super().build_generator( + models, args, seq_gen_cls=SequenceGenerator, extra_gen_cls_kwargs=extra_gen_cls_kwargs + ) + + @classmethod + def _get_size_ratios(cls, ids: List[str], sizes: List[int], alpha: float = 1.0): + """Size ratios for temperature-based sampling + (https://arxiv.org/abs/1907.05019)""" + _sizes = np.array(sizes) + prob = _sizes / _sizes.sum() + smoothed_prob = prob ** alpha + smoothed_prob = smoothed_prob / smoothed_prob.sum() + size_ratio = (smoothed_prob * _sizes.sum()) / _sizes + + o_str = str({_i: f"{prob[i]:.3f}" for i, _i in enumerate(ids)}) + logger.info(f"original sampling probability: {o_str}") + p_str = str({_i: f"{smoothed_prob[i]:.3f}" for i, _i in enumerate(ids)}) + logger.info(f"balanced sampling probability: {p_str}") + sr_str = str({_id: f"{size_ratio[i]:.3f}" for i, _id in enumerate(ids)}) + logger.info(f"balanced sampling size ratio: {sr_str}") + return size_ratio.tolist() + + def resample_multi_modality_dataset(self, speech_dataset, sup_dataset, mono_datasets, paired_datasets, mono_splits, paired_splits, epoch=1, train=True): + assert len(mono_datasets+paired_datasets) > 0, f"No text data loaded!" + + if len(mono_datasets) > 1 and self.cfg.text_sampling_alpha != 1.0: + size_ratios = self._get_size_ratios( + mono_splits, [len(s) for s in mono_datasets], alpha=self.cfg.text_sampling_alpha + ) + mono_datasets = [ + ResamplingDataset( + d, size_ratio=r, seed=0, epoch=epoch, replace=(r >= 1.0) + ) for d, r in zip(mono_datasets, size_ratios) + ] + + if len(paired_datasets) > 1 and self.cfg.text_sampling_alpha != 1.0: + size_ratios = self._get_size_ratios( + paired_splits, [len(s) for s in paired_datasets], alpha=self.cfg.text_sampling_alpha + ) + paired_datasets = [ + ResamplingDataset( + d, size_ratio=r, seed=0, epoch=epoch, replace=(r >= 1.0) + ) for d, r in zip(paired_datasets, size_ratios) + ] + + dataset_list = [speech_dataset, sup_dataset] + for datasets in [mono_datasets, paired_datasets]: + if len(datasets) > 1: + dataset_list.append(ConcatDataset(datasets)) + elif len(datasets) == 1: + dataset_list.append(datasets[0]) + else: + dataset_list.append(None) + + ### match speech/text datasets according to modality + dataset_dict = OrderedDict((name, d) for name, d in zip(["speech", "speech_sup", "text_mono", "text_paired"], dataset_list) if d is not None) + max_positions_dict = { + "speech": None, + "speech_sup": None, + "text_mono": (self.cfg.text_cfg.tokens_per_sample, self.cfg.text_cfg.tokens_per_sample), + "text_paired": (self.cfg.text_cfg.tokens_per_sample, self.cfg.text_cfg.tokens_per_sample), + } + max_positions_dict = OrderedDict((name, max_positions_dict[name]) for name in dataset_dict.keys()) + max_tokens_ratios_dict = { + "speech": 1.0, + "speech_sup": 1.0, + "text_mono": 1.0 / 320 / self.cfg.text_cfg.text_maxtokens_ratio, + "text_paired": 1.0 / 320 / self.cfg.text_cfg.text_maxtokens_ratio, + } + max_tokens_ratios = [max_tokens_ratios_dict[name] for name in dataset_dict.keys()] + dataset_lens = np.array([len(dataset) for dataset in dataset_dict.values()]) + dataset_avg_sample_lens = np.array([ + sum([dataset.num_tokens(i) for i in np.random.randint(low=0, high=len(dataset), size=10000)]) / 10000.0 + for dataset in dataset_dict.values() + ]) + + if not "speech" in dataset_dict: + distributions = [l / sum(dataset_lens) for l in dataset_lens] + else: + ## we just keep the batches of speech and non-speech the same, expand_coef is to ensure speech batches is less than others + first_ratio = dataset_lens[0] / sum(dataset_lens) + expand_coef = 1.2 if sup_dataset is None else 1.1 * sum(dataset_lens[0:2]) / dataset_lens[0] + distributions = [expand_coef * max_tokens_ratios[i] * dataset_avg_sample_lens[0] / l for (i, l) in enumerate(dataset_avg_sample_lens)] + distributions[0] = 1.0 + if sup_dataset is not None: + distributions[1] = dataset_lens[1] / dataset_lens[0] + distributions = [first_ratio * d for d in distributions] + + logging.info(f"Number samples of datasets is {dataset_lens}") + logging.info(f"Avg sample length of datasets is {dataset_avg_sample_lens}") + logging.info(f"Sampling distributions is {distributions}") + logging.info(f"Maxtokens ratio is {max_tokens_ratios}") + return dataset_dict, max_positions_dict, distributions, max_tokens_ratios + + def build_tokenizer(self, cfg=None): + logger.info(f"tokenizer: {self.cfg.hubert_tokenizer}") + if self.cfg.hubert_tokenizer != "none": + return encoders.build_bpe(Namespace(**{"bpe": self.cfg.hubert_tokenizer, "sentencepiece_model": self.cfg.sp_path})) + else: + return None + + def load_char_bart_dataset(self, split): + mono_dataset = data_utils.load_indexed_dataset( + f"{self.cfg.text_cfg.text_data}/{split}", + self.text_dictionary, + ) + mono_dataset = StripTokenDataset(mono_dataset, self.text_dictionary.eos()) + mono_dataset = maybe_shorten_dataset( + mono_dataset, + split, + self.cfg.text_cfg.shorten_data_split_list, + self.cfg.text_cfg.shorten_method, + self.cfg.text_cfg.tokens_per_sample - 2, + self.cfg.text_cfg.seed, + ) + logger.info("loaded {} samples from: {}".format(len(mono_dataset), mono_dataset)) + ### prepend bos and eos to dataset + mono_dataset = PrependTokenDataset(mono_dataset, self.text_dictionary.bos()) + mono_dataset = AppendTokenDataset(mono_dataset, self.text_dictionary.eos()) + mask_whole_words = ( + get_whole_word_mask(None, self.text_dictionary) + if self.cfg.text_cfg.mask_whole_words + else None + ) + lang=self.cfg.speech_tgt_lang + mono_dataset = DenoisingDataset( + mono_dataset, + mono_dataset.sizes, + self.text_dictionary, + self.mask_idx, + mask_whole_words, + shuffle=self.cfg.text_cfg.shuffle_instance, + seed=self.cfg.text_cfg.seed, + args=self.cfg.text_cfg, + tgt_lang_idx=_lang_token_index(self.text_dictionary, lang) if self.cfg.text_cfg.prepend_tgt_lang_tag else None, + ) + + return mono_dataset diff --git a/SpeechT5/SpeechLM/README.md b/SpeechT5/SpeechLM/README.md new file mode 100644 index 0000000000000000000000000000000000000000..11923ca3022332fd6f9e02b634ec871dde7b164b --- /dev/null +++ b/SpeechT5/SpeechLM/README.md @@ -0,0 +1,268 @@ +# SpeechLM + + + + [**SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data**](https://arxiv.org/abs/2209.15329) + +- June 2023: We have corrected the errors in the pre-training data for SpeechLM-P Base models, and new results are updated. + +- April 2023: We discovered some errors about the data in the pre-training experiments, which will affect all the results about SpeechLM-P Base models. We are re-conducting the related experiments and will update the paper with the new results. + +- (Done) Oct 2022: release the code and models +- Oct 2022: release preprint in [arXiv](https://arxiv.org/abs/2209.15329) + +## Pre-Trained and Fine-tuned Models + +| Model | Pre-training Dataset | Fine-tuning Dataset | Model | +| :------: | :----------------------------------------------: | :-----------------: | :-----: | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechlm/update/speechlm_checkpoint_298_400000.pt?sv=2020-04-08&st=2023-06-19T10%3A35%3A37Z&se=2033-06-20T10%3A35%3A00Z&sr=b&sp=r&sig=xPzDV3Zm7l7Mp4dgMxAYMOcoZfVJjlbBglqD7uw2XW0%3D) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [100 hrs LibriSpeech](http://www.openslr.org/12) | [Azure Storage](https://valle.blob.core.windows.net/share/speechlm/update/checkpoint_best_asr_ft.pt?sv=2020-04-08&st=2023-06-19T10%3A36%3A39Z&se=2033-06-20T10%3A36%3A00Z&sr=b&sp=r&sig=xbS2hGAlTr7K6JJdBN0nKrPtITZE62eT%2FoEK3MBsnZs%3D) | +| SpeechLM-H Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | - | [Google drive](https://drive.google.com/file/d/1eblW8U8f9t-NTuCNRrNHwr-8BeLAUAmQ/view?usp=sharing) | +| SpeechLM-H Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [100 hrs LibriSpeech](http://www.openslr.org/12) | [Google drive](https://drive.google.com/file/d/1vXyO5DolbiWiTYZ6pkkKQsu2wJetaPlv/view?usp=sharing) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [En-De CoVoST-2](https://github.com/facebookresearch/covost) | [Azure Storage](https://valle.blob.core.windows.net/share/speechlm/update/checkpoint_best_ende_ft.pt?sv=2020-04-08&st=2023-06-19T10%3A37%3A23Z&se=2033-06-20T10%3A37%3A00Z&sr=b&sp=r&sig=bNET3bF240rQg%2B%2F87WC%2FJ1cMojI0WEIoqwEfM7PyQUE%3D) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [En-Ca CoVoST-2](https://github.com/facebookresearch/covost) | [Azure Storage](https://valle.blob.core.windows.net/share/speechlm/update/checkpoint_best_enca_ft.pt?sv=2020-04-08&st=2023-06-19T10%3A37%3A46Z&se=2033-06-20T10%3A37%3A00Z&sr=b&sp=r&sig=9H1XMRiAU8tz%2B9Ri4sUGP0kZFiiQ5cSVqAqShZAhIzY%3D) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [En-Ar CoVoST-2](https://github.com/facebookresearch/covost) | [Azure Storage](https://valle.blob.core.windows.net/share/speechlm/update/checkpoint_best_enar_ft.pt?sv=2020-04-08&st=2023-06-19T10%3A38%3A05Z&se=2033-06-20T10%3A38%3A00Z&sr=b&sp=r&sig=mvlF1vmbW9mr66dP3wW9M%2BiU7ASluD4xqCbxblYPCOw%3D) | +| SpeechLM-P Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [En-Tr CoVoST-2](https://github.com/facebookresearch/covost) | [Azure Storage](https://valle.blob.core.windows.net/share/speechlm/update/checkpoint_best_entr_ft.pt?sv=2020-04-08&st=2023-06-19T10%3A38%3A29Z&se=2033-06-20T10%3A38%3A00Z&sr=b&sp=r&sig=Wda6nh9AVlcJAI6PamiEuHeeCwi4Yudva060qGORbSc%3D) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | - | [Google drive](https://drive.google.com/file/d/1QjLIgTJKIylVIp5hUkfSjGPtz8Xo7Lky/view?usp=sharing) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | [960 hrs LibriSpeech](http://www.openslr.org/12) | [Google drive](https://drive.google.com/file/d/1YZQDVv096o8Opt0RBnkRiZXYPRDqKZnP/view?usp=sharing) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | [En-De CoVoST-2](https://github.com/facebookresearch/covost) | [Google drive](https://drive.google.com/file/d/1qYygNWSc11TQbBI1OzC4ChlR-dNh8t9S/view?usp=sharing) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | [En-Ca CoVoST-2](https://github.com/facebookresearch/covost) | [Google drive](https://drive.google.com/file/d/162U88mwso2aVfzzPkEM2nP_vwTpcb57T/view?usp=sharing) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | [En-Ar CoVoST-2](https://github.com/facebookresearch/covost) | [Google drive](https://drive.google.com/file/d/1lbTSRXewEeb2t45URunD6EiJcbniyjWW/view?usp=sharing) | +| SpeechLM-P Large | [60k hrs LibriLight](https://github.com/facebookresearch/libri-light) + [40M Text](http://www.openslr.org/11) | [En-Tr CoVoST-2](https://github.com/facebookresearch/covost) | [Google drive](https://drive.google.com/file/d/1Er4I_jHS175pQQph223yKtiiLQ378VvH/view?usp=sharing) | + + +## Extract features using pre-trained models +For easier use of our pre-trained models, we merge all inference-related code to [`SpeechLM.py`](SpeechLM.py) and make cleaned checkpoints [~~`SpeechLM-P Base`~~](https://valle.blob.core.windows.net/share/speechlm/speechlmp_base_checkpoint_clean.pt?sv=2020-04-08&st=2023-04-04T05%3A42%3A17Z&se=2033-04-05T05%3A42%3A00Z&sr=b&sp=r&sig=DN7VwaEWhrhRPiyuT84mJpohrMeJsEPq4o6qRr8BNsk%3D) [`SpeechLM-H Base`](https://valle.blob.core.windows.net/share/speechlm/speechlmh_base_checkpoint_clean.pt?sv=2020-04-08&st=2023-04-04T05%3A43%3A07Z&se=2033-04-05T05%3A43%3A00Z&sr=b&sp=r&sig=T9oaIvrb3z3Wo5GTZp8eP2x7B7yuQ%2B80Ff1KhuWrrKg%3D) [`SpeechLM-P Large`](https://valle.blob.core.windows.net/share/speechlm/speechlmp_large_checkpoint_clean.pt?sv=2020-04-08&st=2023-04-04T05%3A43%3A33Z&se=2033-04-05T05%3A43%3A00Z&sr=b&sp=r&sig=qfWBNdiIGuDgkgUiHXaWnPiVbUHm3VSp%2FHTlWrCghRk%3D) by removing non-required modules. Now you can directly use the following script to extract your speech features: +```python +import torch +import torch.nn.functional as F +from SpeechLM import SpeechLMConfig, SpeechLM + +checkpoint = torch.load('path/to/the/cleaned/checkpoint.pt') +cfg = SpeechLMConfig(checkpoint['cfg']['model']) +model = SpeechLM(cfg) +model.load_state_dict(checkpoint['model']) +model.eval() + +wav_input_16khz = torch.randn(1,10000) +normalize = checkpoint['cfg']['task']['normalize'] # False for base model, True for large model +if normalize: + wav_input_16khz = F.layer_norm(wav_input_16khz[0], wav_input_16khz[0].shape).unsqueeze(0) + +# extract the representation of last layer +rep = model.extract_features(wav_input_16khz)[0] + +# extract the representation of each layer +output_layer = model.cfg.encoder_layers + model.cfg.text_transformer.encoder.layers +rep, layer_results = model.extract_features(wav_input_16khz, output_layer=output_layer, ret_layer_results=True)[0] +layer_reps = [x.transpose(0, 1) for x in layer_results] +``` + + +## Setup +To fine-tune or pre-train more models, please follow the instructions below. + +```bash +git submodule update --init SpeechLM/fairseq +cd SpeechLM/ +pip install --editable fairseq/ +pip install sacrebleu==1.5.1 +``` + +## ASR on LibriSpeech +### Data preparation +Please follow the steps of wav2vec 2.0 manifest [here](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec#prepare-training-data-manifest) to prepare `train.tsv` and `train.ltr`. You should make sure the vocabulary [`dict.ltr.txt`](dataset/LibriSpeech/asr/dict.ltr.txt) is the same as that used for the pre-trained model. + +Put yout prepared data into `$data_dir`, we provided eamples in [`dataset/LibriSpeech/asr`](dataset/LibriSpeech/asr/). + +### Fine-tune a CTC model +- Fine-tune the base model + ```bash + # Usage: speechlm/scripts/tune_speechlm_asr/finetune_base_ctc.sh [mount=$PWD] [world_size=8] [update_freq=1] + model_path=path/to/your/pre-trained/model + data_dir=dataset/LibriSpeech/asr + bash speechlm/scripts/tune_speechlm_asr/finetune_base_ctc.sh $model_path $data_dir 'tag400k' + ``` +- Fine-tune the large model + ```bash + # Usage: speechlm/scripts/tune_speechlm_asr/finetune_large_ctc.sh [mount=$PWD] [world_size=8] [update_freq=4] + model_path=path/to/your/pre-trained/model + data_dir=dataset/LibriSpeech/asr + bash speechlm/scripts/tune_speechlm_asr/finetune_large_ctc.sh $model_path $data_dir 'tag400k' + ``` +### Decode +- Directly decode a CTC model. + ```bash + # Usage: speechlm/scripts/tune_speechlm_asr/inference_ctc.sh [gen-set=dev_clean,dev_other,test_clean,test_other] + model_path=path/to/your/fine-tuned/model + data_dir=dataset/LibriSpeech/asr + bash speechlm/scripts/tune_speechlm_asr/inference_ctc.sh $model_path $data_dir + # for large models + # bash speechlm/scripts/tune_speechlm_asr/inference_ctc_large.sh $model_path $data_dir + ``` +- Decode with 4-gram language model using [flashlight](https://github.com/flashlight/flashlight/tree/main/bindings/python) and [kenlm](https://github.com/kpu/kenlm). + > Please put [4-gram.arpa](https://www.openslr.org/resources/11/4-gram.arpa.gz) and the word-to-letter lexicon [librispeech_lexicon.lst](https://drive.google.com/file/d/1q7IbNGqtwXnctjvuvpviQ4ZmepFHQmTO/view?usp=sharing) into `$data_dir`. + ```bash + # Usage: speechlm/scripts/tune_speechlm_asr/inference_ctc_kenlm.sh [gen-set=dev_clean,dev_other,test_clean,test_other] + model_path=path/to/your/fine-tuned/model + data_dir=dataset/LibriSpeech/asr + bash speechlm/scripts/tune_speechlm_asr/inference_ctc_kenlm.sh $model_path $data_dir + ``` +- Decode large models with fairseq-lm using [flashlight](https://github.com/flashlight/flashlight/tree/main/bindings/python). + > Please put [lm_librispeech_word_transformer.pt](https://dl.fbaipublicfiles.com/wav2letter/sota/2019/lm/lm_librispeech_word_transformer.pt) and its vocabulary [`dict.txt`](https://dl.fbaipublicfiles.com/wav2letter/sota/2019/lm/lm_librispeech_word_transformer.dict) into `$data_dir/fairseq_word_lm`, and the word-to-letter lexicon [librispeech_lexicon.lst](https://drive.google.com/file/d/1q7IbNGqtwXnctjvuvpviQ4ZmepFHQmTO/view?usp=sharing) into `$data_dir`. Capitalize the `dict.txt` to amke it compatible with the word-to-letter lexicon. + ```bash + # Usage: speechlm/scripts/tune_speechlm_asr/inference_ctc_large_fsqlm.sh [gen-set=dev_clean,dev_other,test_clean,test_other] + model_path=path/to/your/fine-tuned/model + data_dir=dataset/LibriSpeech/asr + bash speechlm/scripts/tune_speechlm_asr/inference_ctc_large_fsqlm.sh $model_path $data_dir dev_other + ``` + +## ST on CoVoST-2 +### Data Preparation +1. Download [Common Voice audio clips](https://commonvoice.mozilla.org/en/datasets) (version 4) for English into `$cv_root/en`. +2. Get data manifest. The following script will convert mp3 files to waveform, create tsv file containing speech/translation paires, create data config files. + ```bash + lang=de # ca,ar,tr + cv_root=dataset/CommonVoice/v4 + bash speechlm/data_process/prepare_covost2_enxx.sh $lang $cv_root + ``` + We provided examples in [`dataset/CommonVoice/v4/en/en-de`](dataset/CommonVoice/v4/en/en-de). + +### Fine-tune a encoder-decoder model +- Fine-tune the Base model (fine-tuned models will be stored in `$mount/exp/finetune_covost`). + + ```bash + model_path=path/to/your/pre-trained/model + lang=de # ca,ar,tr + data_dir=dataset/CommonVoice/v4/en/en-${lang} + # Usage (Base model): speechlm/scripts/tune_speechlm_st/ft_base_covost_enxx.sh [mount=$PWD] [world_size=8] [update_freq=2] + bash speechlm/scripts/tune_speechlm_st/ft_base_covost_enxx.sh $model_path $data_dir $lang 'tag400k' + ``` +- Fine-tune the Large model (fine-tuned models will be stored in `$mount/exp/finetune_covost`). + ```bash + # Usage (Large model): speechlm/scripts/tune_speechlm_st/ft_large_covost_enxx.sh [mount=$PWD] [world_size=8] [update_freq=4] + bash speechlm/scripts/tune_speechlm_st/ft_large_covost_enxx.sh $model_path $data_dir $lang 'tag400k' + ``` + +### Decode +- Decode the base model + ```bash + # Usage: speechlm/scripts/tune_speechlm_st/inference_base.sh [gen-set=dev] [beam_size=5] + model_path=path/to/your/fine-tuned/model + lang=de # ca,ar,tr + data_dir=dataset/CommonVoice/v4/en/en-${lang} + bash speechlm/scripts/tune_speechlm_st/inference_base.sh $model_path $data_dir $lang dev + ``` +- Decode the large model + ```bash + # Usage: speechlm/scripts/tune_speechlm_st/inference_large.sh [gen-set=dev] [beam_size=5] + bash speechlm/scripts/tune_speechlm_st/inference_large.sh $model_path $data_dir $lang dev + ``` + +## Universal Representation Evaluation on SUPERB + +Please refer to [**SUPERB**](https://superbbenchmark.org/) for the downstreaming tasks. + +## Pre-train +Please follow the instructions of [Tokenizer](README.md#Tokenizers) to prepare the pre-training data. We provided examples in [`dataset`](dataset). +- SpeechLM-P Base model + + Models will be stored in `$mount/pretrain`. + ```bash + data_dir=dataset/LibriSpeech/phone_unit # should contain train_960.{tsv,phn} + text_data_dir=dataset/LibriLM/phone_unit/bin-idx # should contain train_text.phn-ltr.{phn,ltr}.{bin,idx} + # Usage: speechlm/scripts/pretrain_speechlm/base_speechlmp.sh [mount=$PWD] [world_size=32] [update_freq=1] + bash speechlm/scripts/pretrain_speechlm/base_speechlmp.sh $data_dir $text_data_dir + ``` +- SpeechLM-H Base model + ```bash + data_dir=dataset/LibriSpeech/hidden_unit # should contain train_960.{tsv,phn} + text_data_dir=dataset/LibriLM/km-ltr/bin-idx # should contain train_text.km-ltr.{km,ltr}.{bin,idx} + # Usage: speechlm/scripts/pretrain_speechlm/base_speechlmh.sh [mount=$PWD] [world_size=32] [update_freq=1] + bash speechlm/scripts/pretrain_speechlm/base_speechlmp.sh $data_dir $text_data_dir + ``` +- SpeechLM-P Large model + ```bash + data_dir=dataset/LibriSpeech/phone_unit # should contain train_960.{tsv,phn} + text_data_dir=dataset/LibriLM/phone_unit/bin-idx # should contain train_text.phn-ltr.{phn,ltr}.{bin,idx} + # Usage: speechlm/scripts/pretrain_speechlm/base_speechlmp.sh [mount=$PWD] [world_size=32] [update_freq=1] + bash speechlm/scripts/pretrain_speechlm/large_speechlmp.sh $data_dir $text_data_dir + ``` + + +## Tokenizers +### Phoneme-unit Tokenizer for Speech +This tokenizer is used to produce the frame-laigned phonemes for unlabeled speech, which is actually a hybrid HMM ASR model. + +In the Base setting, we use 100h LibriSpeech labeled data to train the HMM model under Kaldi recipe, then decode the unpaired speech and get the aligned phonemes from the lattice. +Here we provided the processed phonemes of 960h speech here: [`train_960.tsv`](https://drive.google.com/file/d/1rxlikMglL2kEsF4NfqekZRoA02klY7CE/view?usp=sharing), [`train_960.phn`](), [`dev_clean.tsv`](https://drive.google.com/file/d/1NuVwe687jLBFkDLRy1EV2A2uXyV_kBo2/view?usp=sharing), [`dev_clean.phn`](https://drive.google.com/file/d/1cq_gbS-UgCALOoaE5QmhWrhkTdXuc_Uc/view?usp=sharing). Note that the label-rate is 100 (10ms). + +> The phoneme inventory is 300+ word-position-dependent phones including silence phones. + +### Phoneme-unit Tokenizer for Text +This tokenizer is used to phonemize the unpaired text data to (phonemes, letters) paired data, following a `words -> phonemes -> upsampled phones` pipeline. + +The following script will download LibriSpeech LM corpus and produce the required data: `train_text.phn-ltr.phn.{idx,bin}` and `train_text.phn-ltr.ltr.{idx,bin}`. +> Before runing it, make sure you have our provided [`dict.phn.txt`](dataset/LibriLM/phone_unit/bin-idx/dict.phn.txt) and [`dict.ltr.txt`](dataset/LibriLM/phone_unit/bin-idx/dict.ltr.txt) in the output dir `dataset/LibriLM/phone_unit/bin-idx/`. + +> The phoneme inventory is 300+ word-position-dependent phones including silence phones. + +```bash +# data will be in dataset/LibriLM/phone_unit/ +bash speechlm/data_process/prepare_phn2ltr_librilm.sh +``` +### Hidden-unit Tokenizer for Speech +Please follow the steps of data preparation for HuBERT [here](https://github.com/facebookresearch/fairseq/tree/main/examples/hubert#data-preparation) to prepare 1) wav recordings [`train.tsv`](dataset/LibriSpeech/hidden_unit/train_sample100.tsv) and 2) corresponding hidden-units [`train.km`](dataset/LibriSpeech/hidden_unit/train_sample100.km), and 3) unit vocabulary [`dict.km.txt`](dataset/LibriSpeech/hidden_unit/dict.km.txt). + +### Hidden-unit Tokenizer for Text +This tokenizer is used to produce the speech-style hidden units from unpaired text. +We train a [FastSpeech](https://arxiv.org/abs/2006.04558)-like model (instead generating continuous spectrum in the original paper, here we generate discrete units) on a small amount of ASR data ([100 hrs LibriSpeech](http://www.openslr.org/12)) as the tokenizer. + +Train: +1. Convert asr transcripts to phoneme sequence with duration information. +2. Extract hidden-units from speech, using the [Hidden-unit Tokenizer for Speech](#hidden-unit-tokenizer-for-speech). +3. Train the [model](speechlm/models/fasttext2unit.py) on the paired data: + ```bash + data_dir=dataset/LibriSpeech/fast_phone2unit + bash speechlm/scripts/tokenizer_fastT2U/train_s_5e-4.sh $data_dir + ``` +> The phoneme inventory is 41 mono phones including silence phones. + +Inference: + +4. Convert text data to phoneme sequence by [`lexicon`](https://drive.google.com/file/d/1dh9NEx_cCF9_Aa0UcKyl9j00GXs6LmLQ/view?usp=sharing). +5. [Generate](speechlm/scripts/tokenizer_fastT2U/generate.sh) hidden units for a large text corpus: + ```bash + gen_set=dataset/LibriSpeech/fast_phone2unit/genset_examples + bash speechlm/scripts/tokenizer_fastT2U/generate.sh $model_path $gen_set + ``` +We provided train/generate data examples in [`dataset/LibriSpeech/fast_phone2unit`](dataset/LibriSpeech/fast_phone2unit), and the model checkpoint [here](https://drive.google.com/file/d/1e-aYf8hPXuly8DEvNg5SISOlcUxsgED0/view?usp=sharing). + +## License + +This project is licensed under the license found in the LICENSE file in the root directory of this source tree. +Portions of the source code are based on the [FAIRSEQ](https://github.com/pytorch/fairseq). + +[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct) + +## Reference + +If you find our work is useful in your research, please cite the following paper: + +```bibtex +@article{zhang2022speechlm, + title = {SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data}, + author = {Zhang, Ziqiang and Chen, Sanyuan and Zhou, Long and Wu, Yu and Ren, Shuo and Liu, Shujie and Yao, Zhuoyuan and Gong, Xun and Dai, Lirong and Li, Jinyu and Wei, Furu}, + eprint={2209.15329}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + year={2022} +} +``` + +### Contact Information + +For help or issues using SpeechLM models, please submit a GitHub issue. + +For other communications related to SpeechLM, please contact Long Zhou (`lozhou@microsoft.com`). + diff --git a/SpeechT5/SpeechLM/SpeechLM.py b/SpeechT5/SpeechLM/SpeechLM.py new file mode 100644 index 0000000000000000000000000000000000000000..b242dde083e272f96e80791f13803c44b438991d --- /dev/null +++ b/SpeechT5/SpeechLM/SpeechLM.py @@ -0,0 +1,667 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import copy +import logging +from typing import Dict, List, Optional, Tuple + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from modules import ( + compute_mask_indices, + LayerNorm, + ConvFeatureExtractionModel, + GradMultiply, + TransformerEncoder, + TransformerEncoderBase, + +) + +# from fairseq.models.transformer import TransformerConfig + +logger = logging.getLogger(__name__) + +class DictConfig: + def __init__(self, cfg=None): + if cfg is not None: + self.update(cfg) + + def update(self, cfg: dict): + self.__dict__.update(cfg) + + +class TransformerConfig: + def __init__(self, cfg=None): + if cfg is not None: + self.update(cfg) + + def update(self, cfg: dict): + if 'encoder' in cfg: + self.encoder = DictConfig(cfg['encoder']) + del cfg['encoder'] + if 'quant_noise' in cfg: + self.quant_noise = DictConfig(cfg['quant_noise']) + del cfg['quant_noise'] + if 'decoder' in cfg: + del cfg['decoder'] + self.__dict__.update(cfg) + + +class SpeechLMConfig: + def __init__(self, cfg=None): + self.label_rate: int = 50 + self.extractor_mode: str = "default" # mode for feature extractor. default has a single group norm with d groups in the first conv block, whereas layer_norm has layer norms in every block (meant to use with normalize=True) + self.encoder_layers: int = 12 # num encoder layers in the transformer + self.encoder_embed_dim: int = 768 # encoder embedding dimension + self.encoder_embed_dim: int = 768 # encoder embedding dimension + self.encoder_ffn_embed_dim: int = 3072 # encoder embedding dimension for FFN + self.encoder_attention_heads: int = 12 # num encoder attention heads + self.activation_fn: str = "gelu" # activation function to use + self.layer_type: str = "transformer" # layer type in encoder + + # dropouts + self.dropout: float = 0.1 # dropout probability for the transformer + self.attention_dropout: float = 0.1 # dropout probability for attention weights + self.activation_dropout: float = 0.0 # dropout probability after activation in FFN + self.encoder_layerdrop: float = 0.0 # probability of dropping a tarnsformer layer + self.dropout_input: float = 0.0 # dropout to apply to the input (after feat extr) + self.dropout_features: float = 0.0 # dropout to apply to the features (after feat extr) + + self.final_dim: int = 256 # project final representations and targets to this many dimensions + self.layer_norm_first: bool = False # apply layernorm first in the transformer + self.conv_feature_layers: str = "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2" # string describing convolutional feature extraction layers in form of a python list that contains [(dim, kernel_size, stride), ...] + self.conv_bias: bool = False # include bias in conv encoder + self.feature_grad_mult: float = 1.0 # multiply feature extractor var grads by this + + # masking + self.mask_length: int = 10 # mask length + self.mask_prob: float = 0.65 # probability of replacing a token with mask + self.mask_selection: str = "static" # how to choose mask length + self.mask_other: float = 0 # secondary mask argument (used for more complex distributions), see help in compute_mask_indicesh + self.no_mask_overlap: bool = False # whether to allow masks to overlap + self.mask_min_space: int = 1 # min space between spans (if no overlap is enabled) + + + # channel masking + self.mask_channel_length: int = 10 # length of the mask for features (channels) + self.mask_channel_prob: float = 0.0 # probability of replacing a feature with 0 + self.mask_channel_selection: str = "static" # how to choose mask length for channel masking + self.mask_channel_other: float = 0 # secondary mask argument (used for more complex distributions), see help in compute_mask_indices + self.no_mask_channel_overlap: bool = False # whether to allow channel masks to overlap + self.mask_channel_min_space: int = 1 # min space between spans (if no overlap is enabled) + + # positional embeddings + self.conv_pos: int = 128 # number of filters for convolutional positional embeddings + self.conv_pos_groups: int = 16 # number of groups for convolutional positional embedding + + # loss computation + self.skip_masked: bool = False # skip computing losses over masked frames + self.skip_nomask: bool = False # skip computing losses over unmasked frames + self.checkpoint_activations: bool = False # recompute activations and save memory for extra compute + + # FP16 optimization + self.required_seq_len_multiple: int = 2 # pad the input to encoder such that the sequence length is divisible by multiple + + # Custom + self.use_rel_pos_enc: bool = False # whether to use relative positional encoding + self.scaling_for_att: float = 1.0 # scaling for attention weights to prevent overflow issue (for large model) + + # unit encoder-decoder + self.add_unit_encoder: bool = False # add unit encoder + + # embedding mixing + self.mix_with_unit: bool = True # mix with the unit embeddings + self.use_pred_unit: bool = False # use the embeddings of predicted units + self.l2_embedding: bool = False # compute l2 loss between unit embedding and unit hidden state + + if cfg is not None: + self.update(cfg) + + def update(self, cfg: dict): + model_cfg = copy.deepcopy(cfg) + self.text_transformer = TransformerConfig(model_cfg['text_transformer']) + del model_cfg['text_transformer'] + self.__dict__.update(model_cfg) + +class SpeechLM(nn.Module): + def __init__( + self, + cfg: SpeechLMConfig, + ) -> None: + super().__init__() + self.cfg = cfg + + feature_enc_layers = eval(cfg.conv_feature_layers) # noqa + self.embed = feature_enc_layers[-1][0] + + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + mode=cfg.extractor_mode, + conv_bias=cfg.conv_bias, + ) + sample_rate = 16000 + feature_ds_rate = np.prod([s for _, _, s in feature_enc_layers]) + self.feat2tar_ratio = cfg.label_rate * feature_ds_rate / sample_rate + + self.post_extract_proj = ( + nn.Linear(self.embed, cfg.encoder_embed_dim) + if self.embed != cfg.encoder_embed_dim + else None + ) + + self.mask_prob = cfg.mask_prob + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length = cfg.mask_length + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + self.logit_temp = cfg.logit_temp + self.skip_masked = cfg.skip_masked + self.skip_nomask = cfg.skip_nomask + + self.final_dim = cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim + self.final_proj_list = nn.ModuleList([ + nn.Linear(cfg.encoder_embed_dim, self.final_dim) for _ in range(2) + ]) + + self.mask_emb = nn.Parameter( + torch.FloatTensor(cfg.encoder_embed_dim).uniform_() + ) + + self.encoder = TransformerEncoder(cfg) + self.layer_norm = LayerNorm(self.embed) + + ### build unit encoder: + self.mask_u2t = cfg.mask_u2t + self.compute_mum = cfg.compute_mum + self.add_text_ctc = cfg.add_text_ctc + self.text_ctc_conv_kernel = cfg.text_ctc_conv_kernel + self.padding_idx = 1 + + self.add_unit_encoder = cfg.add_unit_encoder + self.mix_with_unit = cfg.mix_with_unit + self.use_pred_unit = cfg.use_pred_unit + self.l2_embedding = cfg.l2_embedding + if self.add_unit_encoder: + self.unit_embed_tokens = None + ### build unit encoder + self.unit_encoder = TransformerEncoderBase( + cfg.text_transformer, + dictionary=None, + embed_tokens=self.unit_embed_tokens, + use_rel_pos_enc=cfg.use_rel_pos_enc, + scaling_for_att=cfg.scaling_for_att, + ) + + ### build unit2text decoder, not available for now + self.add_decoder = cfg.add_decoder + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions.""" + + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + def apply_mask(self, x, padding_mask, target_list): + B, T, C = x.shape + if self.mask_prob > 0: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def forward_features(self, source: torch.Tensor) -> torch.Tensor: + if self.feature_grad_mult > 0: + features = self.feature_extractor(source) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = self.feature_extractor(source) + return features + + def forward_targets( + self, + features: torch.Tensor, + target_list: List[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Trim features to ensure labels exist and then get aligned labels + feat_tsz = features.size(2) + targ_tsz = min([t.size(1) for t in target_list]) + if self.feat2tar_ratio * feat_tsz > targ_tsz: + feat_tsz = int(targ_tsz / self.feat2tar_ratio) + features = features[..., :feat_tsz] + target_inds = torch.arange(feat_tsz).float() * self.feat2tar_ratio + target_inds += np.random.choice(int(self.feat2tar_ratio)) + target_list = [t[:, target_inds.long()] for t in target_list] + return features, target_list + + def forward_padding_mask( + self, + features: torch.Tensor, + padding_mask: torch.Tensor, + ) -> torch.Tensor: + extra = padding_mask.size(1) % features.size(1) + if extra > 0: + padding_mask = padding_mask[:, :-extra] + padding_mask = padding_mask.view(padding_mask.size(0), features.size(1), -1) + padding_mask = padding_mask.all(-1) + return padding_mask + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + def downsample_ctc_padding_mask(self, padding_mask): + """ + padding_mask: (B, T) + """ + stride = self.text_ctc_conv_kernel // 2 + return padding_mask[:, ::stride] + + def compute_pred(self, proj_x, label_embs): + if self.target_glu: + label_embs = self.target_glu(label_embs) + x = F.normalize(proj_x.float(), dim=-1) # (S, D) + label_embs = F.normalize(label_embs.float(), dim=-1) # (C, D) + logits = torch.matmul(x, label_embs.T).type_as(proj_x) # (S, C) + logits /= self.logit_temp + return logits + + def compute_hubert_logits(self, x, target, proj, label_embs, padding_mask, mask_indices): + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + proj_x_m = proj(x[masked_indices]) + logit_m_list = [(self.compute_pred(proj_x_m, label_embs), target[masked_indices])] + else: + logit_m_list = [None] + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + proj_x_u = proj(x[nomask_indices]) + logit_u_list = [(self.compute_pred(proj_x_u, label_embs), target[nomask_indices])] + else: + logit_u_list = [None] + + return logit_m_list, logit_u_list + + def convert_embeddings(self, + x, + padding_mask, + target=None, + mask_indices=None, + mix_with_unit=False, + use_pred_unit=False, + l2_embedding=False, + remask=False + ): + """ + 1. Mix with units if needed (default: True) + 2. Prepare for unit_encoder inputs + Inputs: + x, (B, T, D) + Return: + src_tokens, (B, T) + soft_embeddings, (B, T, D) + l2_loss, a loss + """ + soft_embeddings = self.final_proj_list[0](x) if x.size(-1) == self.final_dim else x + if padding_mask is None: + padding_mask = soft_embeddings.new_zeros(soft_embeddings.size(0), soft_embeddings.size(1), dtype=torch.long) + if use_pred_unit: + src_tokens = self.compute_pred(self.final_proj_list[0](x), self.label_embs_list[0]).argmax(dim=-1) + src_tokens[padding_mask] = self.padding_idx + elif target is not None: + src_tokens = target + else: + src_tokens = padding_mask.long() + + if l2_embedding | mix_with_unit: + unit_embeddings = self.unit_embed_tokens(src_tokens) # (B, T, D) + + l2_loss = 0 + if l2_embedding: + if mask_indices is not None: + l2_loss = (soft_embeddings - unit_embeddings)[mask_indices].float().pow(2).mean(dim=-1) + scale = unit_embeddings[mask_indices].float().pow(2).sum(dim=-1) + else: + l2_loss = (soft_embeddings - unit_embeddings).float().pow(2).mean(dim=-1) + scale = unit_embeddings.float().pow(2).sum(dim=-1) + l2_loss = (l2_loss / scale).mean() + + if mix_with_unit: + B, T, D = x.shape + selected_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob / 2, + self.mask_length // 2, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + selected_indices = torch.from_numpy(selected_indices).to(x.device) + if mask_indices is not None: + if remask: + remask_indices = torch.logical_and(selected_indices, mask_indices) + soft_embeddings[remask_indices] = self.mask_emb + swap_indices = torch.logical_and(selected_indices, ~mask_indices) + else: + swap_indices = selected_indices + soft_embeddings[swap_indices] = unit_embeddings[swap_indices] + + soft_embeddings = soft_embeddings * (1 - padding_mask.unsqueeze(-1).type_as(x)) + return src_tokens, soft_embeddings, l2_loss + + def forward( + self, + source: torch.Tensor = None, + src_tokens: torch.Tensor = None, + src_lengths: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + assert source is not None or src_tokens is not None + if source is not None: + return self.forward_speech( + source=source, + target_list=target_list, + padding_mask=padding_mask, + mask=mask, + features_only=features_only, + output_layer=output_layer, + ) + else: + return self.forward_text( + src_tokens=src_tokens, + src_lengths=src_lengths, + mask=self.mask_u2t, + output_layer=output_layer, + ) + + def forward_speech( + self, + source: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + """output layer is 1-based""" + features = self.forward_features(source) + if target_list is not None: + features, target_list = self.forward_targets(features, target_list) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + + if mask: + x, mask_indices = self.apply_mask(features, padding_mask, target_list) + else: + x = features + mask_indices = None + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, layer_results = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1, + ) + + if features_only: + return {"x": x, "padding_mask": padding_mask, "features": features, "layer_results": layer_results} + + logit_m_list, logit_u_list = self.compute_hubert_logits( + x, + target_list[0], + self.final_proj_list[0], + self.label_embs_list[0], + padding_mask, + mask_indices, + ) + + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + + if self.add_unit_encoder: + src_tokens, x_emb, l2_loss = self.convert_embeddings( + x, + padding_mask, target_list[0], + mask_indices=mask_indices, + mix_with_unit=self.mix_with_unit, + use_pred_unit=self.use_pred_unit, + l2_embedding=self.l2_embedding, + ) + encoder_out = self.unit_encoder(src_tokens, token_embeddings=x_emb) + + result['encoder_out'] = encoder_out['encoder_out'] # [(T, B, D)] + result['encoder_padding_mask'] = encoder_out['encoder_padding_mask'] # [(B, T)] + if self.l2_embedding: + result['embedding_l2_loss'] = l2_loss + + code_logit_m_list, code_logit_u_list = self.compute_hubert_logits( + encoder_out['encoder_out'][0].transpose(0, 1), + target_list[-1], + self.final_proj_list[-1], + self.label_embs_list[-1], + padding_mask, + mask_indices, + ) + result['logit_m_list'] += code_logit_m_list + result['logit_u_list'] += code_logit_u_list + return result + + def forward_text( + self, + src_tokens: torch.Tensor = None, + src_lengths: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + mask: bool = True, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + assert self.add_unit_encoder, f"Can not forward unit-text branch without unit_encoder!" + + padding_mask = src_tokens == self.padding_idx + unit_embeddings = self.unit_embed_tokens(src_tokens) + if mask: + unit_embeddings, mask_indices = self.apply_mask(unit_embeddings, padding_mask, [src_tokens]) + else: + ### If already applied mask on src_tokens, then the target_list should contains many padding_idx + mask_indices = target_list[-1] != self.padding_idx + unit_embeddings[mask_indices] = self.mask_emb + + encoder_out = self.unit_encoder( + src_tokens, + token_embeddings=unit_embeddings, + return_all_hiddens=output_layer is not None, + ) + + result = {} + result["encoder_out"] = encoder_out["encoder_out"] + result["encoder_states"] = encoder_out["encoder_states"] + result["padding_mask"] = padding_mask + + if self.compute_mum: + code_logit_m_list, code_logit_u_list = self.compute_hubert_logits( + encoder_out["encoder_out"].transpose(0, 1), + target_list[-1], + self.final_proj_list[-1], + self.label_embs_list[-1], + padding_mask, + mask_indices, + ) + result["logit_m_list"] = code_logit_m_list + result["logit_u_list"] = code_logit_u_list + + if self.add_text_ctc: + result["encoder_out_ctc"] = [self.unit_encoder_ctc_head(x) for x in encoder_out['encoder_out']] + result["encoder_padding_mask"] = [ + self.downsample_ctc_padding_mask(padding_mask) for padding_mask in encoder_out['encoder_padding_mask'] + ] + return result + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + ret_layer_results: bool = False, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Extract features for only speech input""" + with torch.no_grad(): + res = self.forward( + source, + padding_mask=padding_mask, + mask=mask, + features_only=True, + output_layer=output_layer, + ) + # {"x": x, "padding_mask": padding_mask, "features": features, "layer_results": layer_results} + + x = res["x"] # B x T x D + padding_mask = res["padding_mask"] + if self.add_unit_encoder and (output_layer is None or output_layer > self.cfg.encoder_layers): + src_tokens, x, _ = self.convert_embeddings( + x, + padding_mask, + mix_with_unit=False, + use_pred_unit=False, + ) + return_all_hiddens=output_layer is not None and output_layer > self.cfg.encoder_layers + encoder_out = self.unit_encoder( + src_tokens, + token_embeddings=x, + return_all_hiddens=return_all_hiddens, + ) + res["x"] = encoder_out['encoder_out'][0].transpose(0, 1) # (B, T, D) + if return_all_hiddens: + res["layer_results"] += encoder_out['encoder_states'][1:1+output_layer-len(res["layer_results"])] + + feature = res["features"] if ret_conv else res["x"] + if ret_layer_results: + feature = (feature, res["layer_results"]) + + return feature, padding_mask + + def get_logits(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + logits_list = [x[0].float() for x in logits_list if x is not None] + return logits_list + + def get_targets(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + targets_list = [x[1].long() for x in logits_list if x is not None] + return targets_list + + def get_extra_losses(self, net_output): + extra_losses = [] + names = [] + + if "features_pen" in net_output: + extra_losses.append(net_output["features_pen"]) + names.append("features_pen") + + if "embedding_l2_loss" in net_output: + extra_losses.append(net_output["embedding_l2_loss"]) + names.append("embedding_l2_loss") + + return extra_losses, names + + def remove_pretraining_modules(self, step2=False): + self.target_glu = None + diff --git a/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/config_base_ende.yaml b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/config_base_ende.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50733b2740c6f02f3adfc1d536a3a4005ffa7d6a --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/config_base_ende.yaml @@ -0,0 +1,14 @@ +bpe_tokenizer: + bpe: sentencepiece + sentencepiece_model: spm_char_st_en_de.model + +shuffle: false +use_audio_input: true +use_sample_rate: 16000 +standardize_audio: false +vocab_filename: spm_char_st_en_de.txt + +# required by speech_to_text task but never used +input_channels: 1 +input_feat_per_channel: 1 + diff --git a/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/config_large_ende.yaml b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/config_large_ende.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3424a3c55f0e48e8197d98cd3e724baa08c834f --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/config_large_ende.yaml @@ -0,0 +1,14 @@ +bpe_tokenizer: + bpe: sentencepiece + sentencepiece_model: spm_char_st_en_de.model + +shuffle: false +use_audio_input: true +use_sample_rate: 16000 +standardize_audio: true +vocab_filename: spm_char_st_en_de.txt + +# required by speech_to_text task but never used +input_channels: 1 +input_feat_per_channel: 1 + diff --git a/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/dev-sample100_st_en_de_local.tsv b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/dev-sample100_st_en_de_local.tsv new file mode 100644 index 0000000000000000000000000000000000000000..c4251fa8a24f33e2ebd44ad90899c0778e24aaf8 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/dev-sample100_st_en_de_local.tsv @@ -0,0 +1,100 @@ +id audio n_frames tgt_text +common_voice_en_18540003 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18540003.wav 90624 Wenn Wasser knapp ist, verschwenden Sie es nicht. +common_voice_en_18540005 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18540005.wav 57984 Du fährst mit ihr bis zu ihrer Tür. +common_voice_en_18540006 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18540006.wav 63744 Celia schreckte zurück und zitterte. +common_voice_en_65557 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_65557.wav 40704 Haben Sie einen Ring? +common_voice_en_65559 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_65559.wav 44160 Ich habe ihn nicht einmal gefragt. +common_voice_en_19594267 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19594267.wav 110208 Der größte See nach Fläche in der Mongolei, der Uvs-See, ist in der Great Lakes Depression. +common_voice_en_19594268 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19594268.wav 91392 Die darauffolgende Wiedervereinigung mit Rom hat bis heute ununterbrochen angedauert. +common_voice_en_19594269 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19594269.wav 64128 Die Saiten könnten aus Messing oder Stahl sein. +common_voice_en_18282099 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18282099.wav 67584 Andrew rollte sich in der Box zusammen. +common_voice_en_2518264 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_2518264.wav 61824 Säure ätzt Locher in Wollstoff. +common_voice_en_18909686 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18909686.wav 147072 Dies wurde später von Herny Seebohm beschrieben und Riesen-Fischuhu genannt. +common_voice_en_18909688 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18909688.wav 114048 Er ist auch dazu in der Lage, über kurze Distanzen zu schweben. +common_voice_en_18909689 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18909689.wav 85248 So konnte Letta seine große Koalition fortsetzen. +common_voice_en_18460666 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18460666.wav 56064 Es nicht gekostet wegschieben? +common_voice_en_18460690 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18460690.wav 68736 Ich bin verzweifelt, und damit hatte es sich. +common_voice_en_18460692 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18460692.wav 54912 Ich folge dir nicht, Jeeves. +common_voice_en_485640 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_485640.wav 70272 Ordentliche Pläne scheitern ohne Glück. +common_voice_en_89833 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_89833.wav 128256 Das ist ein super Armband, das du trägst. +common_voice_en_19001715 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19001715.wav 148224 Der Buddhismus in Afghanistan wurde von den Saffariden, Ghaznawiden und Ghuriden erfolgreich beseitigt. +common_voice_en_19001716 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19001716.wav 80256 Das System sieht einen frei schwebenden Lauf vor. +common_voice_en_19001719 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19001719.wav 109056 Diese bekannten Murderabilia-Händler finden Sie auf den folgenden Websites. +common_voice_en_9774 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_9774.wav 119040 Sie liest unheimlich gern, weiß jedoch nicht so genau, wie das Lesen zu einer Steigerung der Kreativität beitragen kann. +common_voice_en_26370 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_26370.wav 62208 Danke, dass Sie uns an Ihrer Geschichte haben teilhaben lassen. Alles Gute für die Hochzeitsreise. +common_voice_en_26372 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_26372.wav 59904 Sie kennen die Uhrzeit doch. Warum fragen Sie mich danach? +common_voice_en_17260994 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_17260994.wav 155520 Der Fuchs sprang über den Rand der Farm. Dort fand er einen Safari-Reisenden vor, der eine Vivaldi Opera zum Besten gab. +common_voice_en_18881599 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18881599.wav 108672 "Express" sollte das Gebiet untersuchen und fand dort nichts. +common_voice_en_18881604 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18881604.wav 92544 Dadurch werden die Probleme gemildert, die durch einen Mangel an Hämoglobin verursacht werden. +common_voice_en_18881605 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18881605.wav 109056 Diese Behauptungen werden von der Mainstream-Archäologie kategorisch zurückgewiesen. +common_voice_en_180278 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_180278.wav 48768 Sie sollte eigentlich herunterkommen und Sie abholen. +common_voice_en_180279 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_180279.wav 54912 Ich werde dort nicht als Geist leben. +common_voice_en_696251 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_696251.wav 98304 Der Junge hat bemerkt, dass der Engländer nervös war und seine Bücher vergessen hat. +common_voice_en_19049974 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19049974.wav 73344 Durch eine Augenverletzung fand seine Karriere ein vorzeitiges Ende. +common_voice_en_19049975 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19049975.wav 126336 Supermatrixes ähnlicher Größe können genauso wie normale Matrixes hinzugefügt und vervielfacht werden. +common_voice_en_19049976 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19049976.wav 94464 Es liegt annäherungsweise südlich von Denali, der höchsten Erhebung in Nordamerika. +common_voice_en_19765134 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19765134.wav 110208 Kleinstädte in Vietnam unterstehen der regionalen Regierung. +common_voice_en_19765136 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19765136.wav 61440 Fünf Jahre später nahm er ihn nach Dresden mit. +common_voice_en_19765138 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19765138.wav 130176 Der Croma ist standardmäßig mit Anti-Blockier-System (ABS) und Elektronischer Bremskraftverteilung (EBD) ausgestattet. +common_voice_en_19688061 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19688061.wav 104448 Carter hat zwei Kinder, die Tochter Taleya und den Sohn Tamere. +common_voice_en_19688062 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19688062.wav 148992 Wenn der Gehalt an gelöstem Sauerstoff zu hypoxischen Bedingungen übergeht, ersticken Fische und andere Meerestiere. +common_voice_en_19688064 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19688064.wav 48768 Adams hatte ein Leben mit vielen Tiefen. +common_voice_en_19690060 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19690060.wav 136320 Er hat die Dudley Middle Comprehensive School besucht. +common_voice_en_19690063 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19690063.wav 116352 Der ursprüngliche Name der Schule lautet "School of Commerce and Domestic Science" (Handels- und Hauswirtschaftsschule). +common_voice_en_19690064 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19690064.wav 124032 Bei dem Unfall, bei dem er am Steuer saß, befand sich auch Anna, seine Tochter, im Auto. Sie hat den Unfall überlebt. +common_voice_en_18260377 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18260377.wav 98304 Jeder möchte gemocht werden. Das liegt in der Natur des Menschen. +common_voice_en_18260378 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18260378.wav 85248 Jeder sollte Zugang zu medizinischer Grundversorgung haben. +common_voice_en_18260379 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18260379.wav 77184 Während wir älter werden, sind wir in unserem Leben gefangen. +common_voice_en_100764 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_100764.wav 73344 Sie sollten das in einem Wahrnehmungsexperiment untersuchen. +common_voice_en_100765 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_100765.wav 70656 Sie haben mich vom ersten Moment an abgelehnt. +common_voice_en_626029 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_626029.wav 104448 Hanf ist ein Gras, das in Teilen der Tropen vorgefunden wird. +common_voice_en_19703984 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19703984.wav 142848 Sowohl Federation als auch Neo-Zeon Forces sehen dabei zu als die Axis beim Wiedereintritt von der Bahn abkommen. +common_voice_en_19703985 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19703985.wav 165120 Das Mutterhaus in Loretto befindet sich in Nerinx, Marion County, Kentucky. +common_voice_en_19703987 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19703987.wav 114048 Der Umfang von Matildas militärischer Ausbildung wird diskutiert. +common_voice_en_19676540 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19676540.wav 105984 Zu Lernzwecken wurden Stifte nach und nach durch Schreibtafeln ersetzt. +common_voice_en_19676541 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19676541.wav 131712 Die extrem hügelige Landschaft zeichnet sich durch eine Art Erhabenheit aus und bietet einen atemberaubenden Ausblick. +common_voice_en_19676542 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19676542.wav 93696 Die beiden Tierbilder wurden zu einem Bild kombiniert. +common_voice_en_19678470 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19678470.wav 145920 Sie und Gatti-Casazza haben sich im darauffolgenden Jahr getrennt und sich dann scheiden lassen. +common_voice_en_19678471 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19678471.wav 74112 Es zeigt allerdings niemand Interesse. +common_voice_en_19678476 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19678476.wav 98688 Er hat keine sinnvollen Aussagen gemacht. Es war nur Kauderwelsch. +common_voice_en_17730605 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_17730605.wav 57984 Wer im Glashaus sitzt, sollte nicht mit Steinen werfen. +common_voice_en_19768089 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19768089.wav 66432 Der Rahmen kippt den Motor leicht nach hinten. +common_voice_en_19768197 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19768197.wav 58752 Bevor er hauptberuflich Politiker wurde, war er Landwirt. +common_voice_en_19768200 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19768200.wav 73344 Er hat auch als Karikaturist und Comiczeichner gearbeitet. +common_voice_en_19699188 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19699188.wav 106368 Das Schiff war zwei von vier Lebensjahren aufgelegt. +common_voice_en_19699189 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19699189.wav 133632 Boucher hat sich von Künstlern wie Peter Pauls Rubens und Antoine Watteau inspirieren lassen. +common_voice_en_19699190 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19699190.wav 108288 Zwei Tracks wurden als Auszüge auf einer Single herausgebracht. +common_voice_en_512711 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_512711.wav 84096 Gruppe von Menschen, von sanftem Licht einer Öllaterne angestrahlt. +common_voice_en_512712 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_512712.wav 103296 Frau mit hellem Haar und Mann mit einem Lächeln, die nebeneinander sitzen. +common_voice_en_512713 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_512713.wav 98304 Ein Mann fährt die Straße entlang und passiert Blumenkübel. Er hält dabei ein zweites Fahrrad. +common_voice_en_19678686 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19678686.wav 114816 Computertische werden normalerweise in der Massenproduktion gefertigt und müssen teilweise in Selbstmontage montiert werden. +common_voice_en_19678689 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19678689.wav 97536 Aufgrund der geringen Auflage gilt es jetzt als Sammlerstück. +common_voice_en_19678692 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19678692.wav 139776 Die Songs von Thrussel haben regelmäßig Themen zum Gegenstand, in denen er sich gegen Konsum und Überwachung durch den Staat ausspricht. +common_voice_en_648128 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_648128.wav 77184 Ein Mann und ein Kind auf einem Campingplatz, die ein Frühstück zubereiten. +common_voice_en_648129 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_648129.wav 72576 Militärangehörige bereiten sich auf ihren Dienst vor. +common_voice_en_648130 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_648130.wav 109824 Ein Baseballspieler, der ein blaues T-Shirt trägt, läuft auf eine Base zu. +common_voice_en_34182 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_34182.wav 82560 Ihr Büro hat mich angerufen, um ihn zurückzuhalten. +common_voice_en_34184 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_34184.wav 67968 Dieser Satz macht überhaupt keinen Sinn. +common_voice_en_92676 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_92676.wav 62976 Eine Gruppe von Leuten läuft durch eine Karnevalsgruppe. +common_voice_en_92677 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_92677.wav 62976 Ein älteres Paar, das singt und Gitarre spielt. +common_voice_en_92678 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_92678.wav 86016 Zwei Männer in roten Hosen vollführen akrobatische Kunststücke mit einer Leiter. +common_voice_en_570502 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_570502.wav 82944 Künstliche neuronale Netzwerke können etwas ganz ähnliches ausführen. +common_voice_en_141246 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_141246.wav 63744 Schalte die Laterne aus, die uns Licht spendet. +common_voice_en_141247 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_141247.wav 62592 Brian reist heute ab. +common_voice_en_19047441 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19047441.wav 84096 Die Bewohner haben im Namen des Dauphin eine Silbermine betrieben. +common_voice_en_19047442 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19047442.wav 74496 Die Statue wurde durch den Millenium Lottery Fund teilfinanziert. +common_voice_en_19047443 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19047443.wav 117504 Das Henderson House in Elmhurst, Illinois, USA; hat einen ähnlichen Grundriss. +common_voice_en_567705 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_567705.wav 54144 Hängen Sie an beide Zweige Lametta. +common_voice_en_17283658 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_17283658.wav 59520 Unter den Linden. +common_voice_en_17283659 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_17283659.wav 119040 Das höchste Gebäude der Welt ist 829,8 m hoch. +common_voice_en_18707930 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18707930.wav 107136 Die Stadt liegt in Harris County, in Südost Texas. +common_voice_en_18707931 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18707931.wav 155904 Das steht im Gegensatz zum Potential des Pacemakers oder dem Strom, der die rhythmische Modulierung der Impulsfrequenz antreibt. +common_voice_en_18707933 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18707933.wav 90624 Die Stadt wird durch eine Stadtverwaltung regiert. +common_voice_en_18524588 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18524588.wav 88704 Genehmigen Sie den Ausdruck meiner vorzüglichen Hochachtung. +common_voice_en_18524590 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18524590.wav 67584 Anhand der Laufzeit kann man ablesen, dass dieser Computer nie neu gestartet wurde. +common_voice_en_18524592 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_18524592.wav 89856 Celia stand dort, war offenbar nicht betroffen und konnte den Vorkommnissen nicht folgen. +common_voice_en_19254317 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19254317.wav 134784 Der Unterricht wird extra abends abgehalten, damit die Studenten von High Schools daran teilnehmen können. +common_voice_en_19254318 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19254318.wav 119424 Dieses Fett ist Halacha und wird auch Chelev oder Talg genannt. +common_voice_en_19254320 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_19254320.wav 97536 Die Patienten und das Krankenhauspersonal haben sie für den Preis nominiert. +common_voice_en_542826 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_542826.wav 117120 Jeder Teilbereich des Bildschirms gehört zu einer bestimmten Reihe und Spalte. +common_voice_en_542828 /LocalData/dataset/CommonVoice/v4/en/wav/common_voice_en_542828.wav 108672 Die internationale Raumstation ist ein großartiges Projekt. diff --git a/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/spm_char_st_en_de.model b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/spm_char_st_en_de.model new file mode 100644 index 0000000000000000000000000000000000000000..b9418a61f6cb120e16d0b64c67886203b5e95da2 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/spm_char_st_en_de.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a0b188591cd0d1e9d713fe1f3a9cfbe23a72b6bf73346ba11a2a70ab1a3a025 +size 239480 diff --git a/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/spm_char_st_en_de.txt b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/spm_char_st_en_de.txt new file mode 100644 index 0000000000000000000000000000000000000000..1a1a2f6420331fb0efef9fe87631b10fa493dba7 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/spm_char_st_en_de.txt @@ -0,0 +1,164 @@ +▁ 1 +e 1 +n 1 +i 1 +r 1 +t 1 +s 1 +a 1 +d 1 +h 1 +u 1 +l 1 +o 1 +c 1 +g 1 +m 1 +. 1 +b 1 +f 1 +w 1 +k 1 +z 1 +S 1 +v 1 +p 1 +, 1 +D 1 +ü 1 +E 1 +ä 1 +A 1 +B 1 +M 1 +G 1 +" 1 +F 1 +K 1 +P 1 +W 1 +T 1 +y 1 +H 1 +ö 1 +I 1 +R 1 +L 1 +- 1 +C 1 +V 1 +N 1 +ß 1 +Z 1 +J 1 +U 1 +j 1 +O 1 +x 1 +? 1 +! 1 +' 1 +q 1 +Y 1 +Ü 1 +: 1 +Q 1 +Ä 1 +Ö 1 +; 1 +( 1 +) 1 +X 1 +0 1 +1 1 +[ 1 +] 1 +é 1 +2 1 +& 1 +3 1 +5 1 +4 1 +7 1 +9 1 +8 1 +6 1 +/ 1 +á 1 +ō 1 +ó 1 +ñ 1 +ú 1 +í 1 +ā 1 +è 1 +* 1 +ć 1 +à 1 +ê 1 +ë 1 +¡ 1 +ç 1 +ð 1 +ã 1 +č 1 +ū 1 +% 1 +É 1 +â 1 +ø 1 +š 1 +å 1 +ô 1 +ł 1 +œ 1 +ş 1 +Š 1 +_ 1 +Î 1 +Ó 1 +æ 1 +ï 1 +ă 1 +ě 1 +ī 1 +ı 1 +ʻ 1 +ʿ 1 +π 1 +и 1 +к 1 += 1 +à 1 +Ø 1 +î 1 +û 1 +þ 1 +ċ 1 +Č 1 +ę 1 +ğ 1 +ń 1 +Ō 1 +ő 1 +ř 1 +ž 1 +ǎ 1 +α 1 +В 1 +е 1 +з 1 +й 1 +л 1 +н 1 +ь 1 +я 1 +ṃ 1 +ạ 1 +ụ 1 +→ 1 +≡ 1 +京 1 +大 1 +都 1 +阪 1 diff --git a/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/spm_char_st_en_de.vocab b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/spm_char_st_en_de.vocab new file mode 100644 index 0000000000000000000000000000000000000000..dcaf02c4610abddbef943bb81b8df7807ca6d7ca --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/CommonVoice/v4/en/en-de/spm_char_st_en_de.vocab @@ -0,0 +1,168 @@ + 0 + 0 + 0 + 0 +▁ -1.94346 +e -2.0247 +n -2.52771 +i -2.69095 +r -2.81179 +t -2.99429 +s -3.07457 +a -3.08727 +d -3.37853 +h -3.41543 +u -3.52845 +l -3.53925 +o -3.76429 +c -3.83672 +g -3.89086 +m -4.03425 +. -4.27171 +b -4.34078 +f -4.45167 +w -4.51255 +k -4.68054 +z -4.81542 +S -4.96966 +v -5.01738 +p -5.09819 +, -5.11371 +D -5.22687 +ü -5.34517 +E -5.43072 +ä -5.43483 +A -5.61389 +B -5.67037 +M -5.68285 +G -5.93387 +" -5.94796 +F -5.95252 +K -5.99114 +P -6.03568 +W -6.0592 +T -6.08128 +y -6.08834 +H -6.14664 +ö -6.17763 +I -6.18576 +R -6.22513 +L -6.30172 +- -6.34074 +C -6.41901 +V -6.44441 +N -6.48507 +ß -6.60475 +Z -6.78851 +J -6.81489 +U -7.04154 +j -7.07161 +O -7.13538 +x -7.50985 +? -7.66957 +! -8.34983 +' -8.62779 +q -8.7511 +Y -8.80869 +Ü -9.0344 +: -9.03696 +Q -9.11993 +Ä -9.61997 +Ö -9.9612 +; -10.0729 +( -10.0826 +) -10.0839 +X -10.6277 +0 -11.1096 +1 -11.1164 +[ -11.296 +] -11.296 +é -11.3293 +2 -11.4413 +& -12.1488 +3 -12.188 +5 -12.3864 +4 -12.4237 +7 -12.4891 +9 -12.6035 +8 -12.6343 +6 -12.666 +/ -12.9645 +á -13.1043 +ō -13.392 +ó -13.5351 +ñ -13.6151 +ú -13.9028 +í -14.1541 +ā -14.1541 +è -14.2282 +* -14.3953 +ć -14.7137 +à -14.8472 +ê -14.8472 +ë -14.8472 +¡ -15.0014 +ç -15.0014 +ð -15.0014 +ã -15.1837 +č -15.1837 +ū -15.1837 +% -15.4069 +É -15.4069 +â -15.4069 +ø -15.4069 +š -15.4069 +å -15.6945 +ô -15.6945 +ł -15.6945 +œ -15.6945 +ş -15.6945 +Š -15.6945 +_ -16.1 +Î -16.1 +Ó -16.1 +æ -16.1 +ï -16.1 +ă -16.1 +ě -16.1 +ī -16.1 +ı -16.1 +ʻ -16.1 +ʿ -16.1 +π -16.1 +и -16.1 +к -16.1 += -16.7932 +à -16.7932 +Ø -16.7932 +î -16.7932 +û -16.7932 +þ -16.7932 +ċ -16.7932 +Č -16.7932 +ę -16.7932 +ğ -16.7932 +ń -16.7932 +Ō -16.7932 +ő -16.7932 +ř -16.7932 +ž -16.7932 +ǎ -16.7932 +α -16.7932 +В -16.7932 +е -16.7932 +з -16.7932 +й -16.7932 +л -16.7932 +н -16.7932 +ь -16.7932 +я -16.7932 +ṃ -16.7932 +ạ -16.7932 +ụ -16.7932 +→ -16.7932 +≡ -16.7932 +京 -16.7932 +大 -16.7932 +都 -16.7932 +阪 -16.7932 diff --git a/SpeechT5/SpeechLM/dataset/LibriLM/hidden_unit/bin-idx/config.yaml b/SpeechT5/SpeechLM/dataset/LibriLM/hidden_unit/bin-idx/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97f25d9780d99813e322fbbf24c5b916525ede94 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriLM/hidden_unit/bin-idx/config.yaml @@ -0,0 +1,3 @@ +vocab_filename: dict.ltr.txt +src_vocab_filename: dict.km.txt + diff --git a/SpeechT5/SpeechLM/dataset/LibriLM/hidden_unit/bin-idx/dict.km.txt b/SpeechT5/SpeechLM/dataset/LibriLM/hidden_unit/bin-idx/dict.km.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbfe59e554d6234f3631d8d09d9281c2160f4675 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriLM/hidden_unit/bin-idx/dict.km.txt @@ -0,0 +1,500 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 +364 364 +365 365 +366 366 +367 367 +368 368 +369 369 +370 370 +371 371 +372 372 +373 373 +374 374 +375 375 +376 376 +377 377 +378 378 +379 379 +380 380 +381 381 +382 382 +383 383 +384 384 +385 385 +386 386 +387 387 +388 388 +389 389 +390 390 +391 391 +392 392 +393 393 +394 394 +395 395 +396 396 +397 397 +398 398 +399 399 +400 400 +401 401 +402 402 +403 403 +404 404 +405 405 +406 406 +407 407 +408 408 +409 409 +410 410 +411 411 +412 412 +413 413 +414 414 +415 415 +416 416 +417 417 +418 418 +419 419 +420 420 +421 421 +422 422 +423 423 +424 424 +425 425 +426 426 +427 427 +428 428 +429 429 +430 430 +431 431 +432 432 +433 433 +434 434 +435 435 +436 436 +437 437 +438 438 +439 439 +440 440 +441 441 +442 442 +443 443 +444 444 +445 445 +446 446 +447 447 +448 448 +449 449 +450 450 +451 451 +452 452 +453 453 +454 454 +455 455 +456 456 +457 457 +458 458 +459 459 +460 460 +461 461 +462 462 +463 463 +464 464 +465 465 +466 466 +467 467 +468 468 +469 469 +470 470 +471 471 +472 472 +473 473 +474 474 +475 475 +476 476 +477 477 +478 478 +479 479 +480 480 +481 481 +482 482 +483 483 +484 484 +485 485 +486 486 +487 487 +488 488 +489 489 +490 490 +491 491 +492 492 +493 493 +494 494 +495 495 +496 496 +497 497 +498 498 +499 499 diff --git a/SpeechT5/SpeechLM/dataset/LibriLM/hidden_unit/bin-idx/dict.ltr.txt b/SpeechT5/SpeechLM/dataset/LibriLM/hidden_unit/bin-idx/dict.ltr.txt new file mode 100644 index 0000000000000000000000000000000000000000..26a7e6ba309998c3868db7ecab5d7afa52a68e52 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriLM/hidden_unit/bin-idx/dict.ltr.txt @@ -0,0 +1,29 @@ +| 803288730 +E 439294199 +T 319071758 +A 277306732 +O 263784364 +N 239361162 +I 237353011 +H 223346762 +S 220175453 +R 203352500 +D 152198685 +L 141597450 +U 98913389 +M 87138757 +C 84680142 +W 81375101 +F 80240665 +G 70642902 +Y 68388038 +P 58436929 +B 52538531 +V 33250231 +K 26906609 +' 9162896 +X 5075632 +J 4746771 +Q 3401794 +Z 2186971 + 1 diff --git a/SpeechT5/SpeechLM/dataset/LibriLM/phone_unit/bin-idx/config.yaml b/SpeechT5/SpeechLM/dataset/LibriLM/phone_unit/bin-idx/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6fd3d8c13f92f3ef5796e4c93adb4fe3161a38b --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriLM/phone_unit/bin-idx/config.yaml @@ -0,0 +1,3 @@ +vocab_filename: dict.ltr.txt +src_vocab_filename: dict.phn.txt + diff --git a/SpeechT5/SpeechLM/dataset/LibriLM/phone_unit/bin-idx/dict.ltr.txt b/SpeechT5/SpeechLM/dataset/LibriLM/phone_unit/bin-idx/dict.ltr.txt new file mode 100644 index 0000000000000000000000000000000000000000..26a7e6ba309998c3868db7ecab5d7afa52a68e52 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriLM/phone_unit/bin-idx/dict.ltr.txt @@ -0,0 +1,29 @@ +| 803288730 +E 439294199 +T 319071758 +A 277306732 +O 263784364 +N 239361162 +I 237353011 +H 223346762 +S 220175453 +R 203352500 +D 152198685 +L 141597450 +U 98913389 +M 87138757 +C 84680142 +W 81375101 +F 80240665 +G 70642902 +Y 68388038 +P 58436929 +B 52538531 +V 33250231 +K 26906609 +' 9162896 +X 5075632 +J 4746771 +Q 3401794 +Z 2186971 + 1 diff --git a/SpeechT5/SpeechLM/dataset/LibriLM/phone_unit/bin-idx/dict.phn.txt b/SpeechT5/SpeechLM/dataset/LibriLM/phone_unit/bin-idx/dict.phn.txt new file mode 100644 index 0000000000000000000000000000000000000000..812e4b06e13b30fda420034927f6f877e2d54f56 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriLM/phone_unit/bin-idx/dict.phn.txt @@ -0,0 +1,364 @@ + 0 +SIL 1 +SIL_B 2 +SIL_E 3 +SIL_I 4 +SIL_S 5 +SPN 6 +SPN_B 7 +SPN_E 8 +SPN_I 9 +SPN_S 10 +AA_B 11 +AA_E 12 +AA_I 13 +AA_S 14 +AA0_B 15 +AA0_E 16 +AA0_I 17 +AA0_S 18 +AA1_B 19 +AA1_E 20 +AA1_I 21 +AA1_S 22 +AA2_B 23 +AA2_E 24 +AA2_I 25 +AA2_S 26 +AE_B 27 +AE_E 28 +AE_I 29 +AE_S 30 +AE0_B 31 +AE0_E 32 +AE0_I 33 +AE0_S 34 +AE1_B 35 +AE1_E 36 +AE1_I 37 +AE1_S 38 +AE2_B 39 +AE2_E 40 +AE2_I 41 +AE2_S 42 +AH_B 43 +AH_E 44 +AH_I 45 +AH_S 46 +AH0_B 47 +AH0_E 48 +AH0_I 49 +AH0_S 50 +AH1_B 51 +AH1_E 52 +AH1_I 53 +AH1_S 54 +AH2_B 55 +AH2_E 56 +AH2_I 57 +AH2_S 58 +AO_B 59 +AO_E 60 +AO_I 61 +AO_S 62 +AO0_B 63 +AO0_E 64 +AO0_I 65 +AO0_S 66 +AO1_B 67 +AO1_E 68 +AO1_I 69 +AO1_S 70 +AO2_B 71 +AO2_E 72 +AO2_I 73 +AO2_S 74 +AW_B 75 +AW_E 76 +AW_I 77 +AW_S 78 +AW0_B 79 +AW0_E 80 +AW0_I 81 +AW0_S 82 +AW1_B 83 +AW1_E 84 +AW1_I 85 +AW1_S 86 +AW2_B 87 +AW2_E 88 +AW2_I 89 +AW2_S 90 +AY_B 91 +AY_E 92 +AY_I 93 +AY_S 94 +AY0_B 95 +AY0_E 96 +AY0_I 97 +AY0_S 98 +AY1_B 99 +AY1_E 100 +AY1_I 101 +AY1_S 102 +AY2_B 103 +AY2_E 104 +AY2_I 105 +AY2_S 106 +B_B 107 +B_E 108 +B_I 109 +B_S 110 +CH_B 111 +CH_E 112 +CH_I 113 +CH_S 114 +D_B 115 +D_E 116 +D_I 117 +D_S 118 +DH_B 119 +DH_E 120 +DH_I 121 +DH_S 122 +EH_B 123 +EH_E 124 +EH_I 125 +EH_S 126 +EH0_B 127 +EH0_E 128 +EH0_I 129 +EH0_S 130 +EH1_B 131 +EH1_E 132 +EH1_I 133 +EH1_S 134 +EH2_B 135 +EH2_E 136 +EH2_I 137 +EH2_S 138 +ER_B 139 +ER_E 140 +ER_I 141 +ER_S 142 +ER0_B 143 +ER0_E 144 +ER0_I 145 +ER0_S 146 +ER1_B 147 +ER1_E 148 +ER1_I 149 +ER1_S 150 +ER2_B 151 +ER2_E 152 +ER2_I 153 +ER2_S 154 +EY_B 155 +EY_E 156 +EY_I 157 +EY_S 158 +EY0_B 159 +EY0_E 160 +EY0_I 161 +EY0_S 162 +EY1_B 163 +EY1_E 164 +EY1_I 165 +EY1_S 166 +EY2_B 167 +EY2_E 168 +EY2_I 169 +EY2_S 170 +F_B 171 +F_E 172 +F_I 173 +F_S 174 +G_B 175 +G_E 176 +G_I 177 +G_S 178 +HH_B 179 +HH_E 180 +HH_I 181 +HH_S 182 +IH_B 183 +IH_E 184 +IH_I 185 +IH_S 186 +IH0_B 187 +IH0_E 188 +IH0_I 189 +IH0_S 190 +IH1_B 191 +IH1_E 192 +IH1_I 193 +IH1_S 194 +IH2_B 195 +IH2_E 196 +IH2_I 197 +IH2_S 198 +IY_B 199 +IY_E 200 +IY_I 201 +IY_S 202 +IY0_B 203 +IY0_E 204 +IY0_I 205 +IY0_S 206 +IY1_B 207 +IY1_E 208 +IY1_I 209 +IY1_S 210 +IY2_B 211 +IY2_E 212 +IY2_I 213 +IY2_S 214 +JH_B 215 +JH_E 216 +JH_I 217 +JH_S 218 +K_B 219 +K_E 220 +K_I 221 +K_S 222 +L_B 223 +L_E 224 +L_I 225 +L_S 226 +M_B 227 +M_E 228 +M_I 229 +M_S 230 +N_B 231 +N_E 232 +N_I 233 +N_S 234 +NG_B 235 +NG_E 236 +NG_I 237 +NG_S 238 +OW_B 239 +OW_E 240 +OW_I 241 +OW_S 242 +OW0_B 243 +OW0_E 244 +OW0_I 245 +OW0_S 246 +OW1_B 247 +OW1_E 248 +OW1_I 249 +OW1_S 250 +OW2_B 251 +OW2_E 252 +OW2_I 253 +OW2_S 254 +OY_B 255 +OY_E 256 +OY_I 257 +OY_S 258 +OY0_B 259 +OY0_E 260 +OY0_I 261 +OY0_S 262 +OY1_B 263 +OY1_E 264 +OY1_I 265 +OY1_S 266 +OY2_B 267 +OY2_E 268 +OY2_I 269 +OY2_S 270 +P_B 271 +P_E 272 +P_I 273 +P_S 274 +R_B 275 +R_E 276 +R_I 277 +R_S 278 +S_B 279 +S_E 280 +S_I 281 +S_S 282 +SH_B 283 +SH_E 284 +SH_I 285 +SH_S 286 +T_B 287 +T_E 288 +T_I 289 +T_S 290 +TH_B 291 +TH_E 292 +TH_I 293 +TH_S 294 +UH_B 295 +UH_E 296 +UH_I 297 +UH_S 298 +UH0_B 299 +UH0_E 300 +UH0_I 301 +UH0_S 302 +UH1_B 303 +UH1_E 304 +UH1_I 305 +UH1_S 306 +UH2_B 307 +UH2_E 308 +UH2_I 309 +UH2_S 310 +UW_B 311 +UW_E 312 +UW_I 313 +UW_S 314 +UW0_B 315 +UW0_E 316 +UW0_I 317 +UW0_S 318 +UW1_B 319 +UW1_E 320 +UW1_I 321 +UW1_S 322 +UW2_B 323 +UW2_E 324 +UW2_I 325 +UW2_S 326 +V_B 327 +V_E 328 +V_I 329 +V_S 330 +W_B 331 +W_E 332 +W_I 333 +W_S 334 +Y_B 335 +Y_E 336 +Y_I 337 +Y_S 338 +Z_B 339 +Z_E 340 +Z_I 341 +Z_S 342 +ZH_B 343 +ZH_E 344 +ZH_I 345 +ZH_S 346 +#0 347 +#1 348 +#2 349 +#3 350 +#4 351 +#5 352 +#6 353 +#7 354 +#8 355 +#9 356 +#10 357 +#11 358 +#12 359 +#13 360 +#14 361 +#15 362 +#16 363 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/asr/dict.ltr.txt b/SpeechT5/SpeechLM/dataset/LibriSpeech/asr/dict.ltr.txt new file mode 100644 index 0000000000000000000000000000000000000000..26a7e6ba309998c3868db7ecab5d7afa52a68e52 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/asr/dict.ltr.txt @@ -0,0 +1,29 @@ +| 803288730 +E 439294199 +T 319071758 +A 277306732 +O 263784364 +N 239361162 +I 237353011 +H 223346762 +S 220175453 +R 203352500 +D 152198685 +L 141597450 +U 98913389 +M 87138757 +C 84680142 +W 81375101 +F 80240665 +G 70642902 +Y 68388038 +P 58436929 +B 52538531 +V 33250231 +K 26906609 +' 9162896 +X 5075632 +J 4746771 +Q 3401794 +Z 2186971 + 1 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/asr/train_sample100.ltr b/SpeechT5/SpeechLM/dataset/LibriSpeech/asr/train_sample100.ltr new file mode 100644 index 0000000000000000000000000000000000000000..ab9ab39e823eba89897e7763155c77d6f2be38a4 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/asr/train_sample100.ltr @@ -0,0 +1,100 @@ +C H A P T E R | O N E | M I S S U S | R A C H E L | L Y N D E | I S | S U R P R I S E D | M I S S U S | R A C H E L | L Y N D E | L I V E D | J U S T | W H E R E | T H E | A V O N L E A | M A I N | R O A D | D I P P E D | D O W N | I N T O | A | L I T T L E | H O L L O W | F R I N G E D | W I T H | A L D E R S | A N D | L A D I E S | E A R D R O P S | A N D | T R A V E R S E D | B Y | A | B R O O K | +T H A T | H A D | I T S | S O U R C E | A W A Y | B A C K | I N | T H E | W O O D S | O F | T H E | O L D | C U T H B E R T | P L A C E | I T | W A S | R E P U T E D | T O | B E | A N | I N T R I C A T E | H E A D L O N G | B R O O K | I N | I T S | E A R L I E R | C O U R S E | T H R O U G H | T H O S E | W O O D S | W I T H | D A R K | S E C R E T S | O F | P O O L | A N D | C A S C A D E | B U T | B Y | T H E | T I M E | I T | R E A C H E D | L Y N D E ' S | H O L L O W | I T | W A S | A | Q U I E T | W E L L | C O N D U C T E D | L I T T L E | S T R E A M | +F O R | N O T | E V E N | A | B R O O K | C O U L D | R U N | P A S T | M I S S U S | R A C H E L | L Y N D E ' S | D O O R | W I T H O U T | D U E | R E G A R D | F O R | D E C E N C Y | A N D | D E C O R U M | I T | P R O B A B L Y | W A S | C O N S C I O U S | T H A T | M I S S U S | R A C H E L | W A S | S I T T I N G | A T | H E R | W I N D O W | K E E P I N G | A | S H A R P | E Y E | O N | E V E R Y T H I N G | T H A T | P A S S E D | F R O M | B R O O K S | A N D | C H I L D R E N | U P | +A N D | T H A T | I F | S H E | N O T I C E D | A N Y T H I N G | O D D | O R | O U T | O F | P L A C E | S H E | W O U L D | N E V E R | R E S T | U N T I L | S H E | H A D | F E R R E T E D | O U T | T H E | W H Y S | A N D | W H E R E F O R E S | T H E R E O F | T H E R E | A R E | P L E N T Y | O F | P E O P L E | I N | A V O N L E A | A N D | O U T | O F | I T | W H O | C A N | A T T E N D | C L O S E L Y | T O | T H E I R | N E I G H B O R ' S | B U S I N E S S | B Y | D I N T | O F | N E G L E C T I N G | T H E I R | O W N | +B U T | M I S S U S | R A C H E L | L Y N D E | W A S | O N E | O F | T H O S E | C A P A B L E | C R E A T U R E S | W H O | C A N | M A N A G E | T H E I R | O W N | C O N C E R N S | A N D | T H O S E | O F | O T H E R | F O L K S | I N T O | T H E | B A R G A I N | S H E | W A S | A | N O T A B L E | H O U S E W I F E | H E R | W O R K | W A S | A L W A Y S | D O N E | A N D | W E L L | D O N E | S H E | R A N | T H E | S E W I N G | C I R C L E | +H E L P E D | R U N | T H E | S U N D A Y | S C H O O L | A N D | W A S | T H E | S T R O N G E S T | P R O P | O F | T H E | C H U R C H | A I D | S O C I E T Y | A N D | F O R E I G N | M I S S I O N S | A U X I L I A R Y | Y E T | W I T H | A L L | T H I S | M I S S U S | R A C H E L | F O U N D | A B U N D A N T | T I M E | T O | S I T | F O R | H O U R S | A T | H E R | K I T C H E N | W I N D O W | K N I T T I N G | C O T T O N | W A R P | Q U I L T S | S H E | H A D | K N I T T E D | S I X T E E N | O F | T H E M | +A S | A V O N L E A | H O U S E K E E P E R S | W E R E | W O N T | T O | T E L L | I N | A W E D | V O I C E S | A N D | K E E P I N G | A | S H A R P | E Y E | O N | T H E | M A I N | R O A D | T H A T | C R O S S E D | T H E | H O L L O W | A N D | W O U N D | U P | T H E | S T E E P | R E D | H I L L | B E Y O N D | +A N Y B O D Y | W H O | W E N T | O U T | O F | I T | O R | I N T O | I T | H A D | T O | P A S S | O V E R | T H A T | H I L L | R O A D | A N D | S O | R U N | T H E | U N S E E N | G A U N T L E T | O F | M I S S U S | R A C H E L ' S | A L L | S E E I N G | E Y E | S H E | W A S | S I T T I N G | T H E R E | O N E | A F T E R N O O N | I N | E A R L Y | J U N E | T H E | S U N | W A S | C O M I N G | I N | A T | T H E | W I N D O W | W A R M | A N D | B R I G H T | +T H E | O R C H A R D | O N | T H E | S L O P E | B E L O W | T H E | H O U S E | W A S | I N | A | B R I D A L | F L U S H | O F | P I N K Y | W H I T E | B L O O M | H U M M E D | O V E R | B Y | A | M Y R I A D | O F | B E E S | T H O M A S | L Y N D E | A | M E E K | L I T T L E | M A N | W H O M | A V O N L E A | P E O P L E | C A L L E D | R A C H E L | L Y N D E ' S | H U S B A N D | W A S | S O W I N G | H I S | L A T E | T U R N I P | S E E D | O N | T H E | H I L L | F I E L D | B E Y O N D | T H E | B A R N | +M I S S U S | R A C H E L | K N E W | T H A T | H E | O U G H T | B E C A U S E | S H E | H A D | H E A R D | H I M | T E L L | P E T E R | M O R R I S O N | T H E | E V E N I N G | B E F O R E | I N | W I L L I A M | J | B L A I R ' S | S T O R E | O V E R | A T | C A R M O D Y | T H A T | H E | M E A N T | T O | S O W | H I S | T U R N I P | S E E D | T H E | N E X T | A F T E R N O O N | +P E T E R | H A D | A S K E D | H I M | O F | C O U R S E | F O R | M A T T H E W | C U T H B E R T | H A D | N E V E R | B E E N | K N O W N | T O | V O L U N T E E R | I N F O R M A T I O N | A B O U T | A N Y T H I N G | I N | H I S | W H O L E | L I F E | A N D | Y E T | H E R E | W A S | M A T T H E W | C U T H B E R T | A T | H A L F | P A S T | T H R E E | O N | T H E | A F T E R N O O N | O F | A | B U S Y | D A Y | P L A C I D L Y | D R I V I N G | O V E R | T H E | H O L L O W | A N D | U P | T H E | H I L L | +A N D | H I S | B E S T | S U I T | O F | C L O T H E S | W H I C H | W A S | P L A I N | P R O O F | T H A T | H E | W A S | G O I N G | O U T | O F | A V O N L E A | A N D | H E | H A D | T H E | B U G G Y | A N D | T H E | S O R R E L | M A R E | W H I C H | B E T O K E N E D | T H A T | H E | W A S | G O I N G | A | C O N S I D E R A B L E | D I S T A N C E | N O W | W H E R E | W A S | M A T T H E W | C U T H B E R T | G O I N G | A N D | W H Y | W A S | H E | G O I N G | T H E R E | +H A D | I T | B E E N | A N Y | O T H E R | M A N | I N | A V O N L E A | M I S S U S | R A C H E L | D E F T L Y | P U T T I N G | T H I S | A N D | T H A T | T O G E T H E R | M I G H T | H A V E | G I V E N | A | P R E T T Y | G O O D | G U E S S | A S | T O | B O T H | Q U E S T I O N S | B U T | M A T T H E W | S O | R A R E L Y | W E N T | F R O M | H O M E | T H A T | I T | M U S T | B E | S O M E T H I N G | P R E S S I N G | A N D | U N U S U A L | W H I C H | W A S | T A K I N G | H I M | +H E | W A S | T H E | S H Y E S T | M A N | A L I V E | A N D | H A T E D | T O | H A V E | T O | G O | A M O N G | S T R A N G E R S | O R | T O | A N Y | P L A C E | W H E R E | H E | M I G H T | H A V E | T O | T A L K | M A T T H E W | D R E S S E D | U P | W I T H | A | W H I T E | C O L L A R | A N D | D R I V I N G | I N | A | B U G G Y | W A S | S O M E T H I N G | T H A T | D I D N ' T | H A P P E N | O F T E N | M I S S U S | R A C H E L | P O N D E R | A S | S H E | M I G H T | C O U L D | M A K E | N O T H I N G | O F | I T | +A N D | H E R | A F T E R N O O N ' S | E N J O Y M E N T | W A S | S P O I L E D | I ' L L | J U S T | S T E P | O V E R | T O | G R E E N | G A B L E S | A F T E R | T E A | A N D | F I N D | O U T | F R O M | M A R I L L A | W H E R E | H E ' S | G O N E | A N D | W H Y | T H E | W O R T H Y | W O M A N | F I N A L L Y | C O N C L U D E D | H E | D O E S N ' T | G E N E R A L L Y | G O | T O | T O W N | T H I S | T I M E | O F | Y E A R | A N D | H E | N E V E R | V I S I T S | +I F | H E ' D | R U N | O U T | O F | T U R N I P | S E E D | H E | W O U L D N ' T | D R E S S | U P | A N D | T A K E | T H E | B U G G Y | T O | G O | F O R | M O R E | +Y E T | S O M E T H I N G | M U S T | H A V E | H A P P E N E D | S I N C E | L A S T | N I G H T | T O | S T A R T | H I M | O F F | I ' M | C L E A N | P U Z Z L E D | T H A T ' S | W H A T | A N D | I | W O N ' T | K N O W | A | M I N U T E ' S | P E A C E | O F | M I N D | O R | C O N S C I E N C E | U N T I L | I | K N O W | W H A T | H A S | T A K E N | M A T T H E W | C U T H B E R T | O U T | O F | A V O N L E A | T O D A Y | A C C O R D I N G L Y | A F T E R | T E A | M I S S U S | R A C H E L | S E T | O U T | S H E | H A D | N O T | F A R | T O | G O | +T H E | B I G | R A M B L I N G | O R C H A R D | E M B O W E R E D | H O U S E | W H E R E | T H E | C U T H B E R T S | L I V E D | W A S | A | S C A N T | Q U A R T E R | O F | A | M I L E | U P | T H E | R O A D | F R O M | L Y N D E ' S | H O L L O W | T O | B E | S U R E | T H E | L O N G | L A N E | M A D E | I T | A | G O O D | D E A L | F U R T H E R | M A T T H E W | C U T H B E R T ' S | F A T H E R | A S | S H Y | A N D | S I L E N T | A S | H I S | S O N | A F T E R | H I M | +H A D | G O T | A S | F A R | A W A Y | A S | H E | P O S S I B L Y | C O U L D | F R O M | H I S | F E L L O W | M E N | W I T H O U T | A C T U A L L Y | R E T R E A T I N G | I N T O | T H E | W O O D S | W H E N | H E | F O U N D E D | H I S | H O M E S T E A D | G R E E N | G A B L E S | W A S | B U I L T | A T | T H E | F U R T H E S T | E D G E | O F | H I S | C L E A R E D | L A N D | A N D | T H E R E | I T | W A S | T O | T H I S | D A Y | +B A R E L Y | V I S I B L E | F R O M | T H E | M A I N | R O A D | A L O N G | W H I C H | A L L | T H E | O T H E R | A V O N L E A | H O U S E S | W E R E | S O | S O C I A B L Y | S I T U A T E D | M I S S U S | R A C H E L | L Y N D E | D I D | N O T | C A L L | L I V I N G | I N | S U C H | A | P L A C E | L I V I N G | A T | A L L | I T ' S | J U S T | S T A Y I N G | T H A T ' S | W H A T | S H E | S A I D | A S | S H E | S T E P P E D | A L O N G | T H E | D E E P | R U T T E D | G R A S S Y | L A N E | +B O R D E R E D | W I T H | W I L D | R O S E | B U S H E S | I T ' S | N O | W O N D E R | M A T T H E W | A N D | M A R I L L A | A R E | B O T H | A | L I T T L E | O D D | L I V I N G | A W A Y | B A C K | H E R E | B Y | T H E M S E L V E S | T R E E S | A R E N ' T | M U C H | C O M P A N Y | T H O U G H | D E A R | K N O W S | I F | T H E Y | W E R E | T H E R E ' D | B E | E N O U G H | O F | T H E M | I ' D | R U T H E R | L O O K | A T | P E O P L E | T O | B E | S U R E | +T H E Y | S E E M | C O N T E N T E D | E N O U G H | B U T | T H E N | I | S U P P O S E | T H E Y ' R E | U S E D | T O | I T | A | B O D Y | C A N | G E T | U S E D | T O | A N Y T H I N G | E V E N | T O | B E I N G | H A N G E D | A S | T H E | I R I S H M A N | S A I D | W I T H | T H I S | M I S S U S | R A C H E L | S T E P P E D | O U T | O F | T H E | L A N E | I N T O | T H E | B A C K Y A R D | O F | G R E E N | G A B L E S | V E R Y | G R E E N | A N D | N E A T | A N D | P R E C I S E | W A S | T H A T | Y A R D | +S E T | A B O U T | O N | O N E | S I D E | W I T H | G R E A T | P A T R I A R C H A L | W I L L O W S | A N D | T H E | O T H E R | W I T H | P R I M | L O M B A R D I E S | N O T | A | S T R A Y | S T I C K | N O R | S T O N E | W A S | T O | B E | S E E N | F O R | M I S S U S | R A C H E L | W O U L D | H A V E | S E E N | I T | I F | T H E R E | H A D | B E E N | P R I V A T E L Y | S H E | W A S | O F | T H E | O P I N I O N | T H A T | M A R I L L A | C U T H B E R T | S W E P T | T H A T | Y A R D | O V E R | A S | O F T E N | A S | S H E | S W E P T | H E R | H O U S E | +O N E | C O U L D | H A V E | E A T E N | A | M E A L | O F F | T H E | G R O U N D | W I T H O U T | O V E R B R I M M I N G | T H E | P R O V E R B I A L | P E C K | O F | D I R T | M I S S U S | R A C H E L | R A P P E D | S M A R T L Y | A T | T H E | K I T C H E N | D O O R | A N D | S T E P P E D | I N | W H E N | B I D D E N | T O | D O | S O | T H E | K I T C H E N | A T | G R E E N | G A B L E S | W A S | A | C H E E R F U L | A P A R T M E N T | +O R | W O U L D | H A V E | B E E N | C H E E R F U L | I F | I T | H A D | N O T | B E E N | S O | P A I N F U L L Y | C L E A N | A S | T O | G I V E | I T | S O M E T H I N G | O F | T H E | A P P E A R A N C E | O F | A N | U N U S E D | P A R L O R | I T S | W I N D O W S | L O O K E D | E A S T | A N D | W E S T | T H R O U G H | T H E | W E S T | O N E | L O O K I N G | O U T | O N | T H E | B A C K | Y A R D | C A M E | A | F L O O D | O F | M E L L O W | J U N E | S U N L I G H T | B U T | T H E | E A S T | O N E | +W H E N C E | Y O U | G O T | A | G L I M P S E | O F | T H E | B L O O M | W H I T E | C H E R R Y | T R E E S | I N | T H E | L E F T | O R C H A R D | A N D | N O D D I N G | S L E N D E R | B I R C H E S | D O W N | I N | T H E | H O L L O W | B Y | T H E | B R O O K | W A S | G R E E N E D | O V E R | B Y | A | T A N G L E | O F | V I N E S | H E R E | S A T | M A R I L L A | C U T H B E R T | W H E N | S H E | S A T | A T | A L L | A L W A Y S | S L I G H T L Y | D I S T R U S T F U L | O F | S U N S H I N E | +A N D | H E R E | S H E | S A T | N O W | K N I T T I N G | A N D | T H E | T A B L E | B E H I N D | H E R | W A S | L A I D | F O R | S U P P E R | M I S S U S | R A C H E L | B E F O R E | S H E | H A D | F A I R L Y | C L O S E D | T H E | D O O R | +T H E R E | W E R E | T H R E E | P L A T E S | L A I D | S O | T H A T | M A R I L L A | M U S T | B E | E X P E C T I N G | S O M E | O N E | H O M E | W I T H | M A T T H E W | T O | T E A | B U T | T H E | D I S H E S | W E R E | E V E R Y D A Y | D I S H E S | A N D | T H E R E | W A S | O N L Y | C R A B | A P P L E | P R E S E R V E S | A N D | O N E | K I N D | O F | C A K E | S O | T H A T | T H E | E X P E C T E D | C O M P A N Y | C O U L D | N O T | B E | A N Y | P A R T I C U L A R | C O M P A N Y | +Y E T | W H A T | O F | M A T T H E W ' S | W H I T E | C O L L A R | A N D | T H E | S O R R E L | M A R E | M I S S U S | R A C H E L | W A S | G E T T I N G | F A I R L Y | D I Z Z Y | W I T H | T H I S | U N U S U A L | M Y S T E R Y | A B O U T | Q U I E T | U N M Y S T E R I O U S | G R E E N | G A B L E S | G O O D | E V E N I N G | R A C H E L | M A R I L L A | S A I D | B R I S K L Y | T H I S | I S | A | R E A L | F I N E | E V E N I N G | I S N ' T | I T | W O N ' T | Y O U | S I T | D O W N | +H O W | A R E | A L L | Y O U R | F O L K S | S O M E T H I N G | T H A T | F O R | L A C K | O F | A N Y | O T H E R | N A M E | M I G H T | B E | C A L L E D | F R I E N D S H I P | E X I S T E D | A N D | A L W A Y S | H A D | E X I S T E D | B E T W E E N | M A R I L L A | C U T H B E R T | A N D | M I S S U S | R A C H E L | I N | S P I T E | O F | O R | P E R H A P S | B E C A U S E | O F | T H E I R | D I S S I M I L A R I T Y | M A R I L L A | W A S | A | T A L L | +T H I N | W O M A N | W I T H | A N G L E S | A N D | W I T H O U T | C U R V E S | H E R | D A R K | H A I R | S H O W E D | S O M E | G R A Y | S T R E A K S | A N D | W A S | A L W A Y S | T W I S T E D | U P | I N | A | H A R D | L I T T L E | K N O T | B E H I N D | W I T H | T W O | W I R E | H A I R P I N S | S T U C K | A G G R E S S I V E L Y | T H R O U G H | I T | S H E | L O O K E D | L I K E | A | W O M A N | O F | N A R R O W | E X P E R I E N C E | A N D | R I G I D | C O N S C I E N C E | W H I C H | S H E | W A S | +B U T | T H E R E | W A S | A | S A V I N G | S O M E T H I N G | A B O U T | H E R | M O U T H | W H I C H | I F | I T | H A D | B E E N | E V E R | S O | S L I G H T L Y | D E V E L O P E D | M I G H T | H A V E | B E E N | C O N S I D E R E D | I N D I C A T I V E | O F | A | S E N S E | O F | H U M O R | W E ' R E | A L L | P R E T T Y | W E L L | S A I D | M I S S U S | R A C H E L | I | W A S | K I N D | O F | A F R A I D | Y O U | W E R E N ' T | T H O U G H | W H E N | I | S A W | M A T T H E W | S T A R T I N G | O F F | T O D A Y | I | T H O U G H T | M A Y B E | H E | W A S | G O I N G | T O | T H E | D O C T O R ' S | +M A R I L L A ' S | L I P S | T W I T C H E D | U N D E R S T A N D I N G L Y | S H E | H A D | E X P E C T E D | M I S S U S | R A C H E L | U P | S H E | H A D | K N O W N | T H A T | T H E | S I G H T | O F | M A T T H E W | J A U N T I N G | O F F | S O | U N A C C O U N T A B L Y | W O U L D | B E | T O O | M U C H | F O R | H E R | N E I G H B O R ' S | C U R I O S I T Y | O H | N O | I ' M | Q U I T E | W E L L | A L T H O U G H | I | H A D | A | B A D | H E A D A C H E | Y E S T E R D A Y | S H E | S A I D | +M A T T H E W | W E N T | T O | B R I G H T | R I V E R | W E ' R E | G E T T I N G | A | L I T T L E | B O Y | F R O M | A N | O R P H A N | A S Y L U M | I N | N O V A | S C O T I A | A N D | H E ' S | C O M I N G | O N | T H E | T R A I N | T O N I G H T | I F | M A R I L L A | H A D | S A I D | T H A T | M A T T H E W | H A D | G O N E | T O | B R I G H T | R I V E R | T O | M E E T | A | K A N G A R O O | F R O M | A U S T R A L I A | M I S S U S | R A C H E L | C O U L D | N O T | H A V E | B E E N | M O R E | A S T O N I S H E D | +S H E | W A S | A C T U A L L Y | S T R I C K E N | D U M B | F O R | F I V E | S E C O N D S | I T | W A S | U N S U P P O S A B L E | T H A T | M A R I L L A | W A S | M A K I N G | F U N | O F | H E R | B U T | M I S S U S | R A C H E L | W A S | A L M O S T | F O R C E D | T O | S U P P O S E | I T | A R E | Y O U | I N | E A R N E S T | M A R I L L A | S H E | D E M A N D E D | W H E N | V O I C E | R E T U R N E D | T O | H E R | Y E S | O F | C O U R S E | +S A I D | M A R I L L A | A S | I F | G E T T I N G | B O Y S | F R O M | O R P H A N | A S Y L U M S | I N | N O V A | S C O T I A | W E R E | P A R T | O F | T H E | U S U A L | S P R I N G | W O R K | O N | A N Y | W E L L | R E G U L A T E D | A V O N L E A | F A R M | I N S T E A D | O F | B E I N G | A N | U N H E A R D | O F | I N N O V A T I O N | M I S S U S | R A C H E L | F E L T | T H A T | S H E | H A D | R E C E I V E D | A | S E V E R E | M E N T A L | J O L T | S H E | T H O U G H T | I N | E X C L A M A T I O N | P O I N T S | +M A R I L L A | A N D | M A T T H E W | C U T H B E R T | O F | A L L | P E O P L E | A D O P T I N G | A | B O Y | F R O M | A N | O R P H A N | A S Y L U M | W E L L | T H E | W O R L D | W A S | C E R T A I N L Y | T U R N I N G | U P S I D E | D O W N | S H E | W O U L D | B E | S U R P R I S E D | A T | N O T H I N G | A F T E R | T H I S | N O T H I N G | +W H A T | O N | E A R T H | P U T | S U C H | A | N O T I O N | I N T O | Y O U R | H E A D | S H E | D E M A N D E D | D I S A P P R O V I N G L Y | T H I S | H A D | B E E N | D O N E | W I T H O U T | H E R | A D V I C E | B E I N G | A S K E D | A N D | M U S T | P E R F O R C E | B E | D I S A P P R O V E D | W E L L | W E ' V E | B E E N | T H I N K I N G | A B O U T | I T | F O R | S O M E | T I M E | A L L | W I N T E R | I N | F A C T | R E T U R N E D | M A R I L L A | +M I S S U S | A L E X A N D E R | S P E N C E R | W A S | U P | H E R E | O N E | D A Y | B E F O R E | C H R I S T M A S | A N D | S H E | S A I D | S H E | W A S | G O I N G | T O | G E T | A | L I T T L E | G I R L | F R O M | T H E | A S Y L U M | O V E R | I N | H O P E T O N | I N | T H E | S P R I N G | +S O | M A T T H E W | A N D | I | H A V E | T A L K E D | I T | O V E R | O F F | A N D | O N | E V E R | S I N C E | W E | T H O U G H T | W E ' D | G E T | A | B O Y | M A T T H E W | I S | G E T T I N G | U P | I N | Y E A R S | Y O U | K N O W | H E ' S | S I X T Y | A N D | H E | I S N ' T | S O | S P R Y | A S | H E | O N C E | W A S | H I S | H E A R T | T R O U B L E S | H I M | A | G O O D | D E A L | A N D | Y O U | K N O W | H O W | D E S P E R A T E | H A R D | I T ' S | G O T | T O | B E | T O | G E T | H I R E D | H E L P | +T H E R E ' S | N E V E R | A N Y B O D Y | T O | B E | H A D | B U T | T H O S E | S T U P I D | H A L F | G R O W N | L I T T L E | F R E N C H | B O Y S | A N D | A S | S O O N | A S | Y O U | D O | G E T | O N E | B R O K E | I N T O | Y O U R | W A Y S | A N D | T A U G H T | S O M E T H I N G | H E ' S | U P | A N D | O F F | T O | T H E | L O B S T E R | C A N N E R I E S | O R | T H E | S T A T E S | A T | F I R S T | M A T T H E W | S U G G E S T E D | G E T T I N G | A | H O M E | B O Y | B U T | I | S A I D | N O | F L A T | T O | T H A T | +T H E Y | M A Y | B E | A L L | R I G H T | I ' M | N O T | S A Y I N G | T H E Y ' R E | N O T | B U T | N O | L O N D O N | S T R E E T | A R A B S | F O R | M E | I | S A I D | G I V E | M E | A | N A T I V E | B O R N | A T | L E A S T | T H E R E ' L L | B E | A | R I S K | N O | M A T T E R | W H O | W E | G E T | B U T | I ' L L | F E E L | E A S I E R | I N | M Y | M I N D | A N D | S L E E P | S O U N D E R | A T | N I G H T S | I F | W E | G E T | A | B O R N | C A N A D I A N | +S O | I N | T H E | E N D | W E | D E C I D E D | T O | A S K | M I S S U S | S P E N C E R | T O | P I C K | U S | O U T | O N E | W H E N | S H E | W E N T | O V E R | T O | G E T | H E R | L I T T L E | G I R L | W E | H E A R D | L A S T | W E E K | S H E | W A S | G O I N G | S O | W E | S E N T | H E R | W O R D | B Y | R I C H A R D | S P E N C E R ' S | F O L K S | A T | C A R M O D Y | T O | B R I N G | U S | A | S M A R T | L I K E L Y | B O Y | O F | A B O U T | T E N | O R | E L E V E N | W E | D E C I D E D | T H A T | W O U L D | B E | T H E | B E S T | A G E | +O L D | E N O U G H | T O | B E | O F | S O M E | U S E | I N | D O I N G | C H O R E S | R I G H T | O F F | A N D | Y O U N G | E N O U G H | T O | B E | T R A I N E D | U P | P R O P E R | W E | M E A N | T O | G I V E | H I M | A | G O O D | H O M E | A N D | S C H O O L I N G | W E | H A D | A | T E L E G R A M | F R O M | M I S S U S | A L E X A N D E R | S P E N C E R | T O D A Y | T H E | M A I L | M A N | B R O U G H T | I T | F R O M | T H E | S T A T I O N | S A Y I N G | T H E Y | W E R E | C O M I N G | O N | T H E | F I V E | T H I R T Y | T R A I N | T O N I G H T | +S O | M A T T H E W | W E N T | T O | B R I G H T | R I V E R | T O | M E E T | H I M | M I S S U S | S P E N C E R | W I L L | D R O P | H I M | O F F | T H E R E | O F | C O U R S E | S H E | G O E S | O N | T O | W H I T E | S A N D S | S T A T I O N | H E R S E L F | M I S S U S | R A C H E L | P R I D E D | H E R S E L F | O N | A L W A Y S | S P E A K I N G | H E R | M I N D | S H E | P R O C E E D E D | T O | S P E A K | I T | N O W | H A V I N G | A D J U S T E D | H E R | M E N T A L | A T T I T U D E | T O | T H I S | A M A Z I N G | P I E C E | O F | N E W S | +W E L L | M A R I L L A | I ' L L | J U S T | T E L L | Y O U | P L A I N | T H A T | I | T H I N K | Y O U ' R E | D O I N G | A | M I G H T Y | F O O L I S H | T H I N G | A | R I S K Y | T H I N G | T H A T ' S | W H A T | Y O U | D O N ' T | K N O W | W H A T | Y O U ' R E | G E T T I N G | Y O U ' R E | B R I N G I N G | A | S T R A N G E | C H I L D | I N T O | Y O U R | H O U S E | A N D | H O M E | A N D | Y O U | D O N ' T | K N O W | A | S I N G L E | T H I N G | A B O U T | H I M | N O R | W H A T | H I S | D I S P O S I T I O N | I S | L I K E | N O R | W H A T | S O R T | O F | P A R E N T S | H E | H A D | +N O R | H O W | H E ' S | L I K E L Y | T O | T U R N | O U T | W H Y | I T | W A S | O N L Y | L A S T | W E E K | I | R E A D | I N | T H E | P A P E R | H O W | A | M A N | A N D | H I S | W I F E | U P | W E S T | O F | T H E | I S L A N D | T O O K | A | B O Y | O U T | O F | A N | O R P H A N | A S Y L U M | A N D | H E | S E T | F I R E | T O | T H E | H O U S E | A T | N I G H T | S E T | I T | O N | P U R P O S E | M A R I L L A | A N D | N E A R L Y | B U R N T | T H E M | T O | A | C R I S P | I N | T H E I R | B E D S | +A N D | I | K N O W | A N O T H E R | C A S E | W H E R E | A N | A D O P T E D | B O Y | U S E D | T O | S U C K | T H E | E G G S | T H E Y | C O U L D N ' T | B R E A K | H I M | O F | I T | I F | Y O U | H A D | A S K E D | M Y | A D V I C E | I N | T H E | M A T T E R | W H I C H | Y O U | D I D N ' T | D O | M A R I L L A | I ' D | H A V E | S A I D | F O R | M E R C Y ' S | S A K E | N O T | T O | T H I N K | O F | S U C H | A | T H I N G | T H A T ' S | W H A T | +T H I S | J O B ' S | C O M F O R T I N G | S E E M E D | N E I T H E R | T O | O F F E N D | N O R | T O | A L A R M | M A R I L L A | S H E | K N I T T E D | S T E A D I L Y | O N | I | D O N ' T | D E N Y | T H E R E ' S | S O M E T H I N G | I N | W H A T | Y O U | S A Y | R A C H E L | I ' V E | H A D | S O M E | Q U A L M S | M Y S E L F | B U T | M A T T H E W | W A S | T E R R I B L E | S E T | O N | I T | I | C O U L D | S E E | T H A T | S O | I | G A V E | I N | +I T ' S | S O | S E L D O M | M A T T H E W | S E T S | H I S | M I N D | O N | A N Y T H I N G | T H A T | W H E N | H E | D O E S | I | A L W A Y S | F E E L | I T ' S | M Y | D U T Y | T O | G I V E | I N | A N D | A S | F O R | T H E | R I S K | T H E R E ' S | R I S K S | I N | P R E T T Y | N E A R | E V E R Y T H I N G | A | B O D Y | D O E S | I N | T H I S | W O R L D | T H E R E ' S | R I S K S | I N | P E O P L E ' S | H A V I N G | C H I L D R E N | O F | T H E I R | O W N | I F | I T | C O M E S | T O | T H A T | T H E Y | D O N ' T | A L W A Y S | T U R N | O U T | W E L L | +A N D | T H E N | N O V A | S C O T I A | I S | R I G H T | C L O S E | T O | T H E | I S L A N D | I T | I S N ' T | A S | I F | W E | W E R E | G E T T I N G | H I M | F R O M | E N G L A N D | O R | T H E | S T A T E S | H E | C A N ' T | B E | M U C H | D I F F E R E N T | F R O M | O U R S E L V E S | W E L L | I | H O P E | I T | W I L L | T U R N | O U T | A L L | R I G H T | S A I D | M I S S U S | R A C H E L | I N | A | T O N E | T H A T | P L A I N L Y | I N D I C A T E D | H E R | P A I N F U L | D O U B T S | +O N L Y | D O N ' T | S A Y | I | D I D N ' T | W A R N | Y O U | I F | H E | B U R N S | G R E E N | G A B L E S | D O W N | O R | P U T S | S T R Y C H N I N E | I N | T H E | W E L L | I | H E A R D | O F | A | C A S E | O V E R | I N | N E W | B R U N S W I C K | W H E R E | A N | O R P H A N | A S Y L U M | C H I L D | D I D | T H A T | A N D | T H E | W H O L E | F A M I L Y | D I E D | I N | F E A R F U L | A G O N I E S | O N L Y | I T | W A S | A | G I R L | I N | T H A T | I N S T A N C E | W E L L | W E ' R E | N O T | G E T T I N G | A | G I R L | S A I D | M A R I L L A | +A S | I F | P O I S O N I N G | W E L L S | W E R E | A | P U R E L Y | F E M I N I N E | A C C O M P L I S H M E N T | A N D | N O T | T O | B E | D R E A D E D | I N | T H E | C A S E | O F | A | B O Y | I ' D | N E V E R | D R E A M | O F | T A K I N G | A | G I R L | T O | B R I N G | U P | I | W O N D E R | A T | M I S S U S | A L E X A N D E R | S P E N C E R | F O R | D O I N G | I T | B U T | T H E R E | S H E | W O U L D N ' T | S H R I N K | F R O M | A D O P T I N G | A | W H O L E | O R P H A N | A S Y L U M | I F | S H E | T O O K | I T | I N T O | H E R | H E A D | +M I S S U S | R A C H E L | W O U L D | H A V E | L I K E D | T O | S T A Y | U N T I L | M A T T H E W | C A M E | H O M E | W I T H | H I S | I M P O R T E D | O R P H A N | B U T | R E F L E C T I N G | T H A T | I T | W O U L D | B E | A | G O O D | T W O | H O U R S | A T | L E A S T | B E F O R E | H I S | A R R I V A L | S H E | C O N C L U D E D | T O | G O | U P | T H E | R O A D | T O | R O B E R T | B E L L ' S | A N D | T E L L | T H E | N E W S | I T | W O U L D | C E R T A I N L Y | M A K E | A | S E N S A T I O N | S E C O N D | T O | N O N E | +A N D | M I S S U S | R A C H E L | D E A R L Y | L O V E D | T O | M A K E | A | S E N S A T I O N | S O | S H E | T O O K | H E R S E L F | A W A Y | S O M E W H A T | T O | M A R I L L A ' S | R E L I E F | F O R | T H E | L A T T E R | F E L T | H E R | D O U B T S | A N D | F E A R S | R E V I V I N G | U N D E R | T H E | I N F L U E N C E | O F | M I S S U S | R A C H E L ' S | P E S S I M I S M | W E L L | O F | A L L | T H I N G S | T H A T | E V E R | W E R E | O R | W I L L | B E | E J A C U L A T E D | M I S S U S | R A C H E L | W H E N | S H E | W A S | S A F E L Y | O U T | I N | T H E | L A N E | +I T | D O E S | R E A L L Y | S E E M | A S | I F | I | M U S T | B E | D R E A M I N G | W E L L | I ' M | S O R R Y | F O R | T H A T | P O O R | Y O U N G | O N E | A N D | N O | M I S T A K E | M A T T H E W | A N D | M A R I L L A | D O N ' T | K N O W | A N Y T H I N G | A B O U T | C H I L D R E N | A N D | T H E Y ' L L | E X P E C T | H I M | T O | B E | W I S E R | A N D | S T E A D I E R | T H A T | H I S | O W N | G R A N D F A T H E R | +I T | S E E M S | U N C A N N Y | T O | T H I N K | O F | A | C H I L D | A T | G R E E N | G A B L E S | S O M E H O W | T H E R E ' S | N E V E R | B E E N | O N E | T H E R E | F O R | M A T T H E W | A N D | M A R I L L A | W E R E | G R O W N | U P | W H E N | T H E | N E W | H O U S E | W A S | B U I L T | I F | T H E Y | E V E R | W E R E | C H I L D R E N | W H I C H | I S | H A R D | T O | B E L I E V E | W H E N | O N E | L O O K S | A T | T H E M | I | W O U L D N ' T | B E | I N | T H A T | O R P H A N ' S | S H O E S | F O R | A N Y T H I N G | +M Y | B U T | I | P I T Y | H I M | T H A T ' S | W H A T | S O | S A I D | M I S S U S | R A C H E L | T O | T H E | W I L D | R O S E | B U S H E S | O U T | O F | T H E | F U L N E S S | O F | H E R | H E A R T | +C H A P T E R | T W O | M A T T H E W | C U T H B E R T | I S | S U R P R I S E D | M A T T H E W | C U T H B E R T | A N D | T H E | S O R R E L | M A R E | J O G G E D | C O M F O R T A B L Y | O V E R | T H E | E I G H T | M I L E S | T O | B R I G H T | R I V E R | I T | W A S | A | P R E T T Y | R O A D | R U N N I N G | A L O N G | B E T W E E N | S N U G | F A R M S T E A D S | W I T H | N O W | A N D | A G A I N | A | B I T | O F | B A L S A M Y | F I R | W O O D | T O | D R I V E | T H R O U G H | +O R | A | H O L L O W | W H E R E | W I L D | P L U M S | H U N G | O U T | T H E I R | F I L M Y | B L O O M | T H E | A I R | W A S | S W E E T | W I T H | T H E | B R E A T H | O F | M A N Y | A P P L E | O R C H A R D S | A N D | T H E | M E A D O W S | S L O P E D | A W A Y | I N | T H E | D I S T A N C E | T O | H O R I Z O N | M I S T S | O F | P E A R L | A N D | P U R P L E | W H I L E | T H E | L I T T L E | B I R D S | S A N G | A S | I F | I T | W E R E | T H E | O N E | D A Y | O F | S U M M E R | I N | A L L | T H E | Y E A R | +M A T T H E W | E N J O Y E D | T H E | D R I V E | A F T E R | H I S | O W N | F A S H I O N | E X C E P T | D U R I N G | T H E | M O M E N T S | W H E N | H E | M E T | W O M E N | A N D | H A D | T O | N O D | T O | T H E M | F O R | I N | P R I N C E | E D W A R D | I S L A N D | Y O U | A R E | S U P P O S E D | T O | N O D | T O | A L L | A N D | S U N D R Y | Y O U | M E E T | O N | T H E | R O A D | W H E T H E R | Y O U | K N O W | T H E M | O R | N O T | M A T T H E W | D R E A D E D | A L L | W O M E N | E X C E P T | M A R I L L A | A N D | M I S S U S | R A C H E L | +H E | H A D | A N | U N C O M F O R T A B L E | F E E L I N G | T H A T | T H E | M Y S T E R I O U S | C R E A T U R E S | W E R E | S E C R E T L Y | L A U G H I N G | A T | H I M | H E | M A Y | H A V E | B E E N | Q U I T E | R I G H T | I N | T H I N K I N G | S O | F O R | H E | W A S | A N | O D D | L O O K I N G | P E R S O N A G E | W I T H | A N | U N G A I N L Y | F I G U R E | A N D | L O N G | I R O N | G R A Y | H A I R | T H A T | T O U C H E D | H I S | S T O O P I N G | S H O U L D E R S | +A N D | A | F U L L | S O F T | B R O W N | B E A R D | W H I C H | H E | H A D | W O R N | E V E R | S I N C E | H E | W A S | T W E N T Y | I N | F A C T | H E | H A D | L O O K E D | A T | T W E N T Y | V E R Y | M U C H | A S | H E | L O O K E D | A T | S I X T Y | L A C K I N G | A | L I T T L E | O F | T H E | G R A Y N E S S | W H E N | H E | R E A C H E D | B R I G H T | R I V E R | T H E R E | W A S | N O | S I G N | O F | A N Y | T R A I N | +H E | T H O U G H T | H E | W A S | T O O | E A R L Y | S O | H E | T I E D | H I S | H O R S E | I N | T H E | Y A R D | O F | T H E | S M A L L | B R I G H T | R I V E R | H O T E L | A N D | W E N T | O V E R | T O | T H E | S T A T I O N | H O U S E | T H E | L O N G | P L A T F O R M | W A S | A L M O S T | D E S E R T E D | T H E | O N L Y | L I V I N G | C R E A T U R E | I N | S I G H T | B E I N G | A | G I R L | W H O | W A S | S I T T I N G | O N | A | P I L E | O F | S H I N G L E S | A T | T H E | E X T R E M E | E N D | +M A T T H E W | B A R E L Y | N O T I N G | T H A T | I T | W A S | A | G I R L | S I D L E D | P A S T | H E R | A S | Q U I C K L Y | A S | P O S S I B L E | W I T H O U T | L O O K I N G | A T | H E R | H A D | H E | L O O K E D | H E | C O U L D | H A R D L Y | H A V E | F A I L E D | T O | N O T I C E | T H E | T E N S E | R I G I D I T Y | A N D | E X P E C T A T I O N | O F | H E R | A T T I T U D E | A N D | E X P R E S S I O N | S H E | W A S | S I T T I N G | T H E R E | W A I T I N G | F O R | S O M E T H I N G | O R | S O M E B O D Y | +A N D | S I N C E | S I T T I N G | A N D | W A I T I N G | W A S | T H E | O N L Y | T H I N G | T O | D O | J U S T | T H E N | S H E | S A T | A N D | W A I T E D | W I T H | A L L | H E R | M I G H T | A N D | M A I N | M A T T H E W | E N C O U N T E R E D | T H E | S T A T I O N M A S T E R | L O C K I N G | U P | T H E | T I C K E T | O F F I C E | P R E P A R A T O R Y | T O | G O I N G | H O M E | F O R | S U P P E R | A N D | A S K E D | H I M | I F | T H E | F I V E | T H I R T Y | T R A I N | W O U L D | S O O N | B E | A L O N G | +T H E | F I V E | T H I R T Y | T R A I N | H A S | B E E N | I N | A N D | G O N E | H A L F | A N | H O U R | A G O | A N S W E R E D | T H A T | B R I S K | O F F I C I A L | B U T | T H E R E | W A S | A | P A S S E N G E R | D R O P P E D | O F F | F O R | Y O U | A | L I T T L E | G I R L | S H E ' S | S I T T I N G | O U T | T H E R E | O N | T H E | S H I N G L E S | I | A S K E D | H E R | T O | G O | I N T O | T H E | L A D I E S | W A I T I N G | R O O M | B U T | S H E | I N F O R M E D | M E | G R A V E L Y | T H A T | S H E | P R E F E R R E D | T O | S T A Y | O U T S I D E | +S H E ' S | A | C A S E | I | S H O U L D | S A Y | I ' M | N O T | E X P E C T I N G | A | G I R L | S A I D | M A T T H E W | B L A N K L Y | I T ' S | A | B O Y | I ' V E | C O M E | F O R | H E | S H O U L D | B E | H E R E | M I S S U S | A L E X A N D E R | S P E N C E R | W A S | T O | B R I N G | H I M | O V E R | F R O M | N O V A | S C O T I A | F O R | M E | T H E | S T A T I O N M A S T E R | W H I S T L E D | +G U E S S | T H E R E ' S | S O M E | M I S T A K E | H E | S A I D | M I S S U S | S P E N C E R | C A M E | O F F | T H E | T R A I N | W I T H | T H A T | G I R L | A N D | G A V E | H E R | I N T O | M Y | C H A R G E | S A I D | Y O U | A N D | Y O U R | S I S T E R | W E R E | A D O P T I N G | H E R | F R O M | A N | O R P H A N | A S Y L U M | A N D | T H A T | Y O U | W O U L D | B E | A L O N G | F O R | H E R | P R E S E N T L Y | T H A T ' S | A L L | I | K N O W | A B O U T | I T | A N D | I | H A V E N ' T | G O T | A N Y | M O R E | O R P H A N S | C O N C E A L E D | H E R E A B O U T S | +I | D O N ' T | U N D E R S T A N D | S A I D | M A T T H E W | H E L P L E S S L Y | W I S H I N G | T H A T | M A R I L L A | W A S | A T | H A N D | T O | C O P E | W I T H | T H E | S I T U A T I O N | W E L L | Y O U ' D | B E T T E R | Q U E S T I O N | T H E | G I R L | S A I D | T H E | S T A T I O N | M A S T E R | C A R E L E S S L Y | I | D A R E | S A Y | S H E ' L L | B E | A B L E | T O | E X P L A I N | S H E ' S | G O T | A | T O N G U E | O F | H E R | O W N | T H A T ' S | C E R T A I N | +M A Y B E | T H E Y | W E R E | O U T | O F | B O Y S | O F | T H E | B R A N D | Y O U | W A N T E D | H E | W A L K E D | J A U N T I L Y | A W A Y | B E I N G | H U N G R Y | A N D | T H E | U N F O R T U N A T E | M A T T H E W | W A S | L E F T | T O | D O | T H A T | W H I C H | W A S | H A R D E R | F O R | H I M | T H A N | B E A R D I N G | A | L I O N | I N | I T S | D E N | W A L K | U P | T O | A | G I R L | A | S T R A N G E | G I R L | A N | O R P H A N | G I R L | +A N D | D E M A N D | O F | H E R | W H Y | S H E | W A S N ' T | A | B O Y | M A T T H E W | G R O A N E D | I N | S P I R I T | A S | H E | T U R N E D | A B O U T | A N D | S H U F F L E D | G E N T L Y | D O W N | T H E | P L A T F O R M | T O W A R D S | H E R | S H E | H A D | B E E N | W A T C H I N G | H I M | E V E R | S I N C E | H E | H A D | P A S S E D | H E R | A N D | S H E | H A D | H E R | E Y E S | O N | H I M | N O W | M A T T H E W | W A S | N O T | L O O K I N G | A T | H E R | +A | C H I L D | O F | A B O U T | E L E V E N | G A R B E D | I N | A | V E R Y | S H O R T | V E R Y | T I G H T | V E R Y | U G L Y | D R E S S | O F | Y E L L O W I S H | G R A Y | W I N C E Y | S H E | W O R E | A | F A D E D | B R O W N | S A I L O R | H A T | A N D | B E N E A T H | T H E | H A T | E X T E N D I N G | D O W N | H E R | B A C K | W E R E | T W O | B R A I D S | O F | V E R Y | T H I C K | D E C I D E D L Y | R E D | H A I R | +H E R | F A C E | W A S | S M A L L | W H I T E | A N D | T H I N | A L S O | M U C H | F R E C K L E D | H E R | M O U T H | W A S | L A R G E | A N D | S O | W E R E | H E R | E Y E S | W H I C H | L O O K E D | G R E E N | I N | S O M E | L I G H T S | A N D | M O O D S | A N D | G R A Y | I N | O T H E R S | S O | F A R | T H E | O R D I N A R Y | O B S E R V E R | A N | E X T R A O R D I N A R Y | O B S E R V E R | +M I G H T | H A V E | S E E N | T H A T | T H E | C H I N | W A S | V E R Y | P O I N T E D | A N D | P R O N O U N C E D | T H A T | T H E | B I G | E Y E S | W E R E | F U L L | O F | S P I R I T | A N D | V I V A C I T Y | T H A T | T H E | M O U T H | W A S | S W E E T | L I P P E D | A N D | E X P R E S S I V E | T H A T | T H E | F O R E H E A D | W A S | B R O A D | A N D | F U L L | I N | S H O R T | O U R | D I S C E R N I N G | E X T R A O R D I N A R Y | O B S E R V E R | M I G H T | H A V E | C O N C L U D E D | +W A S | S O | L U D I C R O U S L Y | A F R A I D | M A T T H E W | H O W E V E R | W A S | S P A R E D | T H E | O R D E A L | O F | S P E A K I N G | F I R S T | F O R | A S | S O O N | A S | S H E | C O N C L U D E D | T H A T | H E | W A S | C O M I N G | T O | H E R | S H E | S T O O D | U P | G R A S P I N G | W I T H | O N E | T H I N | B R O W N | H A N D | T H E | H A N D L E | O F | A | S H A B B Y | O L D | F A S H I O N E D | C A R P E T | B A G | T H E | O T H E R | S H E | H E L D | O U T | T O | H I M | +I | S U P P O S E | Y O U | A R E | M I S T E R | M A T T H E W | C U T H B E R T | O F | G R E E N | G A B L E S | S H E | S A I D | I N | A | P E C U L I A R L Y | C L E A R | S W E E T | V O I C E | I ' M | V E R Y | G L A D | T O | S E E | Y O U | I | W A S | B E G I N N I N G | T O | B E | A F R A I D | Y O U | W E R E N ' T | C O M I N G | F O R | M E | +I | H A D | M A D E | U P | M Y | M I N D | T H A T | I F | Y O U | D I D N ' T | C O M E | F O R | M E | T O | N I G H T | +I | W O U L D N ' T | B E | A | B I T | A F R A I D | A N D | I T | W O U L D | B E | L O V E L Y | T O | S L E E P | I N | A | W I L D | C H E R R Y | T R E E | A L L | W H I T E | W I T H | B L O O M | I N | T H E | M O O N S H I N E | D O N ' T | Y O U | T H I N K | Y O U | C O U L D | I M A G I N E | Y O U | W E R E | D W E L L I N G | I N | M A R B L E | H A L L S | C O U L D N ' T | Y O U | +M A T T H E W | H A D | T A K E N | T H E | S C R A W N Y | L I T T L E | H A N D | A W K W A R D L Y | I N | H I S | T H E N | A N D | T H E R E | H E | D E C I D E D | W H A T | T O | D O | H E | C O U L D | N O T | T E L L | T H I S | C H I L D | W I T H | T H E | G L O W I N G | E Y E S | T H A T | T H E R E | H A D | B E E N | A | M I S T A K E | H E | W O U L D | T A K E | H E R | H O M E | A N D | L E T | M A R I L L A | D O | T H A T | S H E | C O U L D N ' T | B E | L E F T | A T | B R I G H T | R I V E R | A N Y H O W | +N O | M A T T E R | W H A T | M I S T A K E | H A D | B E E N | M A D E | S O | A L L | Q U E S T I O N S | A N D | E X P L A N A T I O N S | M I G H T | A S | W E L L | B E | D E F E R R E D | U N T I L | H E | W A S | S A F E L Y | B A C K | A T | G R E E N | G A B L E S | I ' M | S O R R Y | I | W A S | L A T E | H E | S A I D | S H Y L Y | C O M E | A L O N G | T H E | H O R S E | I S | O V E R | I N | T H E | Y A R D | G I V E | M E | Y O U R | B A G | O H | I | C A N | C A R R Y | I T | T H E | C H I L D | R E S P O N D E D | C H E E R F U L L Y | +I T | I S N ' T | H E A V Y | I ' V E | G O T | A L L | M Y | W O R L D L Y | G O O D S | I N | I T | B U T | I T | I S N ' T | H E A V Y | A N D | I F | I T | I S N ' T | C A R R I E D | I N | J U S T | A | C E R T A I N | W A Y | T H E | H A N D L E | P U L L S | O U T | S O | I ' D | B E T T E R | K E E P | I T | B E C A U S E | I | K N O W | T H E | E X A C T | K N A C K | O F | I T | I T ' S | A N | E X T R E M E L Y | O L D | C A R P E T | B A G | O H | I ' M | V E R Y | G L A D | Y O U ' V E | C O M E | E V E N | I F | I T | W O U L D | H A V E | B E E N | N I C E | T O | S L E E P | I N | A | W I L D | C H E R R Y | T R E E | +W E ' V E | G O T | T O | D R I V E | A | L O N G | P I E C E | H A V E N ' T | W E | M I S S U S | S P E N C E R | S A I D | I T | W A S | E I G H T | M I L E S | I ' M | G L A D | B E C A U S E | I | L O V E | D R I V I N G | O H | I T | S E E M S | S O | W O N D E R F U L | T H A T | I ' M | G O I N G | T O | L I V E | W I T H | Y O U | A N D | B E L O N G | T O | Y O U | I ' V E | N E V E R | B E L O N G E D | T O | A N Y B O D Y | N O T | R E A L L Y | B U T | T H E | A S Y L U M | W A S | T H E | W O R S T | I ' V E | O N L Y | B E E N | I N | I T | F O U R | M O N T H S | B U T | T H A T | W A S | E N O U G H | +I T ' S | W O R S E | T H A N | A N Y T H I N G | Y O U | C O U L D | I M A G I N E | M I S S U S | S P E N C E R | S A I D | I T | W A S | W I C K E D | O F | M E | T O | T A L K | L I K E | T H A T | +T H E Y | W E R E | G O O D | Y O U | K N O W | T H E | A S Y L U M | P E O P L E | B U T | T H E R E | I S | S O | L I T T L E | S C O P E | F O R | T H E | I M A G I N A T I O N | I N | A N | A S Y L U M | O N L Y | J U S T | I N | T H E | O T H E R | O R P H A N S | I T | W A S | P R E T T Y | I N T E R E S T I N G | T O | I M A G I N E | T H I N G S | A B O U T | T H E M | +W H O | H A D | B E E N | S T O L E N | A W A Y | F R O M | H E R | P A R E N T S | I N | H E R | I N F A N C Y | B Y | A | C R U E L | N U R S E | W H O | D I E D | B E F O R E | S H E | C O U L D | C O N F E S S | I | U S E D | T O | L I E | A W A K E | A T | N I G H T S | A N D | I M A G I N E | T H I N G S | L I K E | T H A T | B E C A U S E | I | D I D N ' T | H A V E | T I M E | I N | T H E | D A Y | I | G U E S S | T H A T ' S | W H Y | I ' M | S O | T H I N | I | A M | D R E A D F U L | T H I N | A I N ' T | I | T H E R E | I S N ' T | A | P I C K | O N | M Y | B O N E S | +I | D O | L O V E | T O | I M A G I N E | I ' M | N I C E | A N D | P L U M P | W I T H | D I M P L E S | I N | M Y | E L B O W S | W I T H | T H I S | M A T T H E W ' S | C O M P A N I O N | S T O P P E D | T A L K I N G | P A R T L Y | B E C A U S E | S H E | W A S | O U T | O F | B R E A T H | A N D | P A R T L Y | B E C A U S E | T H E Y | H A D | R E A C H E D | T H E | B U G G Y | N O T | A N O T H E R | W O R D | D I D | S H E | S A Y | U N T I L | T H E Y | H A D | L E F T | T H E | V I L L A G E | A N D | W E R E | D R I V I N G | D O W N | A | S T E E P | L I T T L E | H I L L | +T H E | R O A D | P A R T | O F | W H I C H | H A D | B E E N | C U T | S O | D E E P L Y | I N T O | T H E | S O F T | S O I L | T H A T | T H E | B A N K S | F R I N G E D | W I T H | B L O O M I N G | W I L D | C H E R R Y | T R E E S | A N D | S L I M | W H I T E | B I R C H E S | W E R E | S E V E R A L | F E E T | A B O V E | T H E I R | H E A D S | T H E | C H I L D | P U T | O U T | H E R | H A N D | A N D | B R O K E | O F F | A | B R A N C H | O F | W I L D | P L U M | T H A T | B R U S H E D | A G A I N S T | T H E | S I D E | O F | T H E | B U G G Y | +I S N ' T | T H A T | B E A U T I F U L | W H A T | D I D | T H A T | T R E E | L E A N I N G | O U T | F R O M | T H E | B A N K | A L L | W H I T E | A N D | L A C Y | M A K E | Y O U | T H I N K | O F | S H E | A S K E D | W E L L | N O W | I | D U N N O | S A I D | M A T T H E W | W H Y | A | B R I D E | O F | C O U R S E | A | B R I D E | A L L | I N | W H I T E | W I T H | A | L O V E L Y | M I S T Y | V E I L | +I ' V E | N E V E R | S E E N | O N E | B U T | I | C A N | I M A G I N E | W H A T | S H E | W O U L D | L O O K | L I K E | I | D O N ' T | E V E R | E X P E C T | T O | B E | A | B R I D E | M Y S E L F | I ' M | S O | H O M E L Y | N O B O D Y | W I L L | E V E R | W A N T | T O | M A R R Y | M E | U N L E S S | I T | M I G H T | B E | A | F O R E I G N | M I S S I O N A R Y | I | S U P P O S E | A | F O R E I G N | M I S S I O N A R Y | M I G H T N ' T | B E | V E R Y | P A R T I C U L A R | +B U T | I | D O | H O P E | T H A T | S O M E | D A Y | I | S H A L L | H A V E | A | W H I T E | D R E S S | T H A T | I S | M Y | H I G H E S T | I D E A L | O F | E A R T H L Y | B L I S S | I | J U S T | L O V E | P R E T T Y | C L O T H E S | A N D | I ' V E | N E V E R | H A D | A | P R E T T Y | D R E S S | I N | M Y | L I F E | T H A T | I | C A N | R E M E M B E R | B U T | O F | C O U R S E | I T ' S | A L L | T H E | M O R E | T O | L O O K | F O R W A R D | T O | I S N ' T | I T | A N D | T H E N | +I | C A N | I M A G I N E | T H A T | I ' M | D R E S S E D | G O R G E O U S L Y | T H I S | M O R N I N G | W H E N | I | L E F T | T H E | A S Y L U M | I | F E L T | S O | A S H A M E D | B E C A U S E | I | H A D | T O | W E A R | T H I S | H O R R I D | O L D | W I N C E Y | D R E S S | A L L | T H E | O R P H A N S | H A D | T O | W E A R | T H E M | Y O U | K N O W | A | M E R C H A N T | I N | H O P E T O N | L A S T | W I N T E R | D O N A T E D | T H R E E | H U N D R E D | Y A R D S | O F | W I N C E Y | T O | T H E | A S Y L U M | S O M E | P E O P L E | S A I D | I T | W A S | B E C A U S E | H E | C O U L D N ' T | S E L L | I T | +B U T | I ' D | R A T H E R | B E L I E V E | T H A T | I T | W A S | O U T | O F | T H E | K I N D N E S S | O F | H I S | H E A R T | W O U L D N ' T | Y O U | W H E N | W E | G O T | O N | T H E | T R A I N | I | F E L T | A S | I F | E V E R Y B O D Y | M U S T | B E | L O O K I N G | A T | M E | A N D | P I T Y I N G | M E | B U T | I | J U S T | W E N T | T O | W O R K | A N D | I M A G I N E D | T H A T | I | H A D | O N | T H E | M O S T | B E A U T I F U L | P A L E | B L U E | S I L K | D R E S S | B E C A U S E | W H E N | Y O U | A R E | I M A G I N I N G | Y O U | M I G H T | A S | W E L L | I M A G I N E | S O M E T H I N G | W O R T H | W H I L E | +A N D | A | B I G | H A T | A L L | F L O W E R S | A N D | N O D D I N G | P L U M E S | A N D | A | G O L D | W A T C H | A N D | K I D | G L O V E S | A N D | B O O T S | I | F E L T | C H E E R E D | U P | R I G H T | A W A Y | A N D | I | E N J O Y E D | M Y | T R I P | T O | T H E | I S L A N D | W I T H | A L L | M Y | M I G H T | I | W A S N ' T | A | B I T | S I C K | C O M I N G | O V E R | I N | T H E | B O A T | N E I T H E R | W A S | M I S S U S | S P E N C E R | A L T H O U G H | S H E | G E N E R A L L Y | I S | +S H E | S A I D | S H E | H A D N ' T | T I M E | T O | G E T | S I C K | W A T C H I N G | T O | S E E | T H A T | I | D I D N ' T | F A L L | O V E R B O A R D | S H E | S A I D | S H E | N E V E R | S A W | T H E | B E A T | O F | M E | F O R | P R O W L I N G | A B O U T | B U T | I F | I T | K E P T | H E R | F R O M | B E I N G | S E A S I C K | I T ' S | A | M E R C Y | I | D I D | P R O W L | I S N ' T | I T | A N D | I | W A N T E D | T O | S E E | E V E R Y T H I N G | T H A T | W A S | T O | B E | S E E N | O N | T H A T | B O A T | B E C A U S E | I | D I D N ' T | K N O W | W H E T H E R | I ' D | E V E R | H A V E | A N O T H E R | O P P O R T U N I T Y | +O H | T H E R E | A R E | A | L O T | M O R E | C H E R R Y | T R E E S | A L L | I N | B L O O M | T H I S | I S L A N D | I S | T H E | B L O O M I E S T | P L A C E | I | J U S T | L O V E | I T | A L R E A D Y | A N D | I ' M | S O | G L A D | I ' M | G O I N G | T O | L I V E | H E R E | I ' V E | A L W A Y S | H E A R D | T H A T | P R I N C E | E D W A R D | I S L A N D | W A S | T H E | P R E T T I E S T | P L A C E | I N | T H E | W O R L D | +A N D | I | U S E D | T O | I M A G I N E | I | W A S | L I V I N G | H E R E | B U T | I | N E V E R | R E A L L Y | E X P E C T E D | I | W O U L D | I T ' S | D E L I G H T F U L | W H E N | Y O U R | I M A G I N A T I O N S | C O M E | T R U E | I S N ' T | I T | B U T | T H O S E | R E D | R O A D S | A R E | S O | F U N N Y | W H E N | W E | G O T | I N T O | T H E | T R A I N | A T | C H A R L O T T E T O W N | A N D | T H E | R E D | R O A D S | B E G A N | T O | F L A S H | P A S T | I | A S K E D | M I S S U S | S P E N C E R | W H A T | M A D E | T H E M | R E D | +A N D | S H E | S A I D | S H E | D I D N ' T | K N O W | A N D | F O R | P I T Y ' S | S A K E | N O T | T O | A S K | H E R | A N Y | M O R E | Q U E S T I O N S | S H E | S A I D | I | M U S T | H A V E | A S K E D | H E R | A | T H O U S A N D | A L R E A D Y | I | S U P P O S E | I | H A D | T O O | B U T | H O W | Y O U | G O I N G | T O | F I N D | O U T | A B O U T | T H I N G S | I F | Y O U | D O N ' T | A S K | Q U E S T I O N S | A N D | W H A T | D O E S | M A K E | T H E | R O A D S | R E D | W E L L | N O W | I | D U N N O | S A I D | M A T T H E W | +T H E R E ' D | B E | N O | S C O P E | F O R | I M A G I N A T I O N | T H E N | W O U L D | T H E R E | B U T | A M | I | T A L K I N G | T O O | M U C H | P E O P L E | A R E | A L W A Y S | T E L L I N G | M E | I | D O | W O U L D | Y O U | R A T H E R | I | D I D N ' T | T A L K | I F | Y O U | S A Y | S O | I ' L L | S T O P | I | C A N | S T O P | W H E N | I | M A K E | U P | M Y | M I N D | T O | I T | A L T H O U G H | I T ' S | D I F F I C U L T | M A T T H E W | +W A S | E N J O Y I N G | H I M S E L F | L I K E | M O S T | Q U I E T | F O L K S | H E | L I K E D | T A L K A T I V E | P E O P L E | W H E N | T H E Y | W E R E | W I L L I N G | T O | D O | T H E | T A L K I N G | T H E M S E L V E S | A N D | D I D | N O T | E X P E C T | H I M | T O | K E E P | U P | H I S | E N D | O F | I T | B U T | H E | H A D | N E V E R | E X P E C T E D | T O | E N J O Y | T H E | S O C I E T Y | O F | A | L I T T L E | G I R L | W O M E N | W E R E | B A D | E N O U G H | I N | A L L | C O N S C I E N C E | B U T | L I T T L E | G I R L S | W E R E | W O R S E | diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/asr/train_sample100.tsv b/SpeechT5/SpeechLM/dataset/LibriSpeech/asr/train_sample100.tsv new file mode 100644 index 0000000000000000000000000000000000000000..8b50a1d2f1e06553881ec3352bee2e6360814635 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/asr/train_sample100.tsv @@ -0,0 +1,101 @@ +/LocalData/dataset/LibriSpeech/train-clean-100 +103/1240/103-1240-0000.flac 225360 +103/1240/103-1240-0001.flac 255120 +103/1240/103-1240-0002.flac 223120 +103/1240/103-1240-0003.flac 235360 +103/1240/103-1240-0004.flac 200240 +103/1240/103-1240-0005.flac 242800 +103/1240/103-1240-0006.flac 153280 +103/1240/103-1240-0007.flac 240560 +103/1240/103-1240-0008.flac 246960 +103/1240/103-1240-0009.flac 160480 +103/1240/103-1240-0010.flac 236880 +103/1240/103-1240-0011.flac 234480 +103/1240/103-1240-0012.flac 243040 +103/1240/103-1240-0013.flac 244160 +103/1240/103-1240-0014.flac 223360 +103/1240/103-1240-0015.flac 60960 +103/1240/103-1240-0016.flac 250640 +103/1240/103-1240-0017.flac 229040 +103/1240/103-1240-0018.flac 185760 +103/1240/103-1240-0019.flac 246480 +103/1240/103-1240-0020.flac 214640 +103/1240/103-1240-0021.flac 236960 +103/1240/103-1240-0022.flac 262000 +103/1240/103-1240-0023.flac 194400 +103/1240/103-1240-0024.flac 244320 +103/1240/103-1240-0025.flac 241920 +103/1240/103-1240-0026.flac 133360 +103/1240/103-1240-0027.flac 223440 +103/1240/103-1240-0028.flac 250400 +103/1240/103-1240-0029.flac 244320 +103/1240/103-1240-0030.flac 232320 +103/1240/103-1240-0031.flac 269760 +103/1240/103-1240-0032.flac 236400 +103/1240/103-1240-0033.flac 230640 +103/1240/103-1240-0034.flac 246480 +103/1240/103-1240-0035.flac 256720 +103/1240/103-1240-0036.flac 200320 +103/1240/103-1240-0037.flac 237040 +103/1240/103-1240-0038.flac 114480 +103/1240/103-1240-0039.flac 230800 +103/1240/103-1240-0040.flac 234720 +103/1240/103-1240-0041.flac 216160 +103/1240/103-1240-0042.flac 249680 +103/1240/103-1240-0043.flac 236160 +103/1240/103-1240-0044.flac 262240 +103/1240/103-1240-0045.flac 250800 +103/1240/103-1240-0046.flac 222800 +103/1240/103-1240-0047.flac 206320 +103/1240/103-1240-0048.flac 236320 +103/1240/103-1240-0049.flac 244560 +103/1240/103-1240-0050.flac 224400 +103/1240/103-1240-0051.flac 245760 +103/1240/103-1240-0052.flac 236640 +103/1240/103-1240-0053.flac 218640 +103/1240/103-1240-0054.flac 261360 +103/1240/103-1240-0055.flac 179920 +103/1240/103-1240-0056.flac 229040 +103/1240/103-1240-0057.flac 109680 +103/1241/103-1241-0000.flac 255440 +103/1241/103-1241-0001.flac 248800 +103/1241/103-1241-0002.flac 249040 +103/1241/103-1241-0003.flac 222160 +103/1241/103-1241-0004.flac 236080 +103/1241/103-1241-0005.flac 224400 +103/1241/103-1241-0006.flac 243760 +103/1241/103-1241-0007.flac 242320 +103/1241/103-1241-0008.flac 242160 +103/1241/103-1241-0009.flac 222400 +103/1241/103-1241-0010.flac 253920 +103/1241/103-1241-0011.flac 231760 +103/1241/103-1241-0012.flac 239680 +103/1241/103-1241-0013.flac 236960 +103/1241/103-1241-0014.flac 242080 +103/1241/103-1241-0015.flac 224160 +103/1241/103-1241-0016.flac 234640 +103/1241/103-1241-0017.flac 254240 +103/1241/103-1241-0018.flac 150960 +103/1241/103-1241-0019.flac 48400 +103/1241/103-1241-0020.flac 155360 +103/1241/103-1241-0021.flac 242880 +103/1241/103-1241-0022.flac 261600 +103/1241/103-1241-0023.flac 266720 +103/1241/103-1241-0024.flac 254240 +103/1241/103-1241-0025.flac 77280 +103/1241/103-1241-0026.flac 176080 +103/1241/103-1241-0027.flac 238080 +103/1241/103-1241-0028.flac 248880 +103/1241/103-1241-0029.flac 244960 +103/1241/103-1241-0030.flac 247520 +103/1241/103-1241-0031.flac 209600 +103/1241/103-1241-0032.flac 224080 +103/1241/103-1241-0033.flac 251920 +103/1241/103-1241-0034.flac 270560 +103/1241/103-1241-0035.flac 248800 +103/1241/103-1241-0036.flac 249040 +103/1241/103-1241-0037.flac 204400 +103/1241/103-1241-0038.flac 238960 +103/1241/103-1241-0039.flac 258160 +103/1241/103-1241-0040.flac 220560 +103/1241/103-1241-0041.flac 252240 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/config.yaml b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eaec2ce8655ebfa043cf73a2ee2d85ac5bcdfb21 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/config.yaml @@ -0,0 +1,13 @@ +audio_root: /home/v-ziqzhang/dataset/librispeech_phone2unit +features: + energy_max: 5.733445167541504 + energy_min: 1.0e-08 + eps: 1.0e-05 + hop_length: 256 + pitch_max: 6.608609099713706 + pitch_min: 1.0e-08 + sample_rate: 16000 +sample_rate: 16000 +vocab_filename: dict.km.txt +src_vocab_filename: dict.phn.txt + diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/config_generate.yaml b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/config_generate.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d9fa74529728fe81f41edd55689f43f6ae2da83 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/config_generate.yaml @@ -0,0 +1,13 @@ +audio_root: /home/v-ziqzhang/dataset/librispeech_phone2unit +features: + energy_max: 5.733445167541504 + energy_min: 1.0e-08 + eps: 1.0e-05 + hop_length: 256 + pitch_max: 6.608609099713706 + pitch_min: 1.0e-08 + sample_rate: 16000 +sample_rate: 16000 +vocab_filename: dict.km.txt +src_vocab_filename: dict.PHN.txt + diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/dict.PHN.txt b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/dict.PHN.txt new file mode 100644 index 0000000000000000000000000000000000000000..60232ecf55c10e9ab673168262af28951ecbec2f --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/dict.PHN.txt @@ -0,0 +1,42 @@ +| 0 + 1 +' 2 +AA 3 +AE 4 +AH 5 +AO 6 +AW 7 +AY 8 +B 9 +CH 10 +D 11 +DH 12 +EH 13 +ER 14 +EY 15 +F 16 +G 17 +HH 18 +IH 19 +IY 20 +JH 21 +K 22 +L 23 +M 24 +N 25 +NG 26 +OW 27 +OY 28 +P 29 +R 30 +S 31 +SH 32 +T 33 +TH 34 +UH 35 +UW 36 +V 37 +W 38 +Y 39 +Z 40 +ZH 41 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/dict.km.txt b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/dict.km.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbfe59e554d6234f3631d8d09d9281c2160f4675 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/dict.km.txt @@ -0,0 +1,500 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 +364 364 +365 365 +366 366 +367 367 +368 368 +369 369 +370 370 +371 371 +372 372 +373 373 +374 374 +375 375 +376 376 +377 377 +378 378 +379 379 +380 380 +381 381 +382 382 +383 383 +384 384 +385 385 +386 386 +387 387 +388 388 +389 389 +390 390 +391 391 +392 392 +393 393 +394 394 +395 395 +396 396 +397 397 +398 398 +399 399 +400 400 +401 401 +402 402 +403 403 +404 404 +405 405 +406 406 +407 407 +408 408 +409 409 +410 410 +411 411 +412 412 +413 413 +414 414 +415 415 +416 416 +417 417 +418 418 +419 419 +420 420 +421 421 +422 422 +423 423 +424 424 +425 425 +426 426 +427 427 +428 428 +429 429 +430 430 +431 431 +432 432 +433 433 +434 434 +435 435 +436 436 +437 437 +438 438 +439 439 +440 440 +441 441 +442 442 +443 443 +444 444 +445 445 +446 446 +447 447 +448 448 +449 449 +450 450 +451 451 +452 452 +453 453 +454 454 +455 455 +456 456 +457 457 +458 458 +459 459 +460 460 +461 461 +462 462 +463 463 +464 464 +465 465 +466 466 +467 467 +468 468 +469 469 +470 470 +471 471 +472 472 +473 473 +474 474 +475 475 +476 476 +477 477 +478 478 +479 479 +480 480 +481 481 +482 482 +483 483 +484 484 +485 485 +486 486 +487 487 +488 488 +489 489 +490 490 +491 491 +492 492 +493 493 +494 494 +495 495 +496 496 +497 497 +498 498 +499 499 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/genset_examples.tsv b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/genset_examples.tsv new file mode 100644 index 0000000000000000000000000000000000000000..fe4a9a1b21a77835afaacc936f59963e8ed0090c --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/genset_examples.tsv @@ -0,0 +1,101 @@ +id speaker n_frames tgt_text unit +librilm-9899 librilm 323 AH B EH T R OW TH AH L AH W EH D IH NG AO R AH K R IH S AH N IH NG AO R DH AH M IH R P R AA K S IH M AH T IY AH V AH G IH T AA R IH Z S AH F IH SH AH N T AH K EY ZH AH N AH N D IH F DH AH AH K EY ZH AH N L AE K S S EH N D F AO R DH AH G IH T AA R AH N D D AE N S EH N IY W EY 0 +librilm-9900 librilm 449 AH B EH T R OW TH AH L B R OW K AH N AO F W AA Z AH TH IH NG DH AE T HH AE D HH AE P AH N D B IH F AO R AH N D M AY T HH AE P AH N AH G EH N IH T W AA Z AH T R AY F AH L IY V IH N AH M IH R N AH TH IH NG IH F OW N L IY HH AH N ER W ER AH N T AH CH T B AY IH T IH F OW N L IY AA T AH M AA R K UH D S T EY K HH IH Z L AY F AH P AA N HH IH Z AH N IH M P IY CH AH B AH L HH AH N ER 0 +librilm-9901 librilm 211 AH B EH T R OW TH AH L S EH R AH M OW N IY AH K AO R D IH NG L IY IH Z DH AH IH M IY D IY AH T AH K EY ZH AH N AH V DH AH K AH M IH NG T AH G EH DH ER AH V AW ER AH K W EY N T AH N S IH Z 0 +librilm-9902 librilm 45 AH B EH T R OW TH AH L D EY 0 +librilm-9903 librilm 141 AH B EH T R OW TH AH L HH IY R IH Z AO L M OW S T AE Z B AY N D IH NG AH N D K W AY T AE Z S AA L AH M AE Z AH M EH R IH JH 0 +librilm-9904 librilm 59 AH B EH T R OW TH AH L IH Z S EY K R AH D 0 +librilm-9905 librilm 79 AH B EH T R OW TH AH L IH Z S AH M TH IH NG HH EH V AH N L IY 0 +librilm-9906 librilm 225 AH B EH T R OW TH AH L R IH NG W AA Z P ER CH AH S T AH N D DH EH N HH ER K AA N SH AH N S B IY IH NG AH P IY Z D SH IY G EY V HH ER S EH L F K AH M P L IY T L IY T UW HH ER L AH V ER 0 +librilm-9907 librilm 288 AH B EH T R OW TH AH L T UH K P L EY S AO L W AA Z HH AA R M AH N IY AH N D F AO R AH T AY M N OW M AO R W AA Z S EH D AH V D IH S IH N ER SH IH T IH NG M AE D AH M D IY L AA P EH L T R IY AO R P AH T IH NG HH ER IH N W AO R D SH IH P 0 +librilm-9908 librilm 139 AH B EH T R OW TH AH L W IY HH AE V B IH N T UW AH B AO L AH V W IH CH AY M AH S T G IH V Y UW AH D IH S K R IH P SH AH N 0 +librilm-9909 librilm 491 AH B EH T R OW TH AH L W IH CH HH AE D T EY K AH N P L EY S AA N DH AH P R IY V IY AH S IY V N IH NG G EY V K AA Z F AO R P L EH N T AH F AH L SH R AH G IH NG AH V SH OW L D ER Z B IH K AO Z DH AH JH EH N T AH L M AH N AE Z Y EH T HH EH L D N OW R IH S P EH K T AH B AH L P AH Z IH SH AH N IH N L AY F AH N D DH AH F IY AE N S IY AE Z S EH V R AH L F IY M EY L F R EH N D Z AH S ER T AH D AH V EH R IY AH N S ER T AH N W AH N 0 +librilm-9910 librilm 269 AH B EH T R OW TH AH L W IH TH AW T AH N D IY V IH N AH G EH N S T DH AH K AH N S EH N T AH V P EH R AH N T S W AA Z S AH M TH IH NG K W AY T AW T S AY D AH V DH AH Y AH NG L EY D IY Z P AW ER AH V K AA M P R IY HH EH N SH AH N 0 +librilm-9911 librilm 29 AH B EH T R AH TH 0 +librilm-9912 librilm 127 AH B EH T R AH TH B R AY D AO T T UW B IY HH AE P IY Y UW AA R AO L W EY Z T EH L IH NG M IY S OW 0 +librilm-9913 librilm 108 AH B EH T R AH TH G ER L IH Z W AH N TH IH NG AH W AY F K W AY T AH N AH DH ER 0 +librilm-9914 librilm 168 AH B EH T R AH TH L AH V ER K AE N AA T T AA L ER EY T EH N IY AH S P ER ZH AH N K AE S T AH P AA N DH AH F EH R S EH K S S EH D JH AO R JH AH Z 0 +librilm-9915 librilm 61 AH B EH T R AH TH L AH V ER Z F EH R W EH L 0 +librilm-9916 librilm 335 AH B EH T R AH TH Y AH NG M AE N AO R HH IH Z F IY M EY L R EH L AH T IH V Z AH S IH S T IH NG HH IH M W AA Z AH K AH S T AH M D T UW M EY K AH P R EH Z AH N T AH V W AH N AO R M AO R P EH T IY K OW T S T UW HH IH Z S W IY T HH AA R T T UW IH N K R IY S HH ER W AO R D R OW B 0 +librilm-9917 librilm 24 AH B EH T ER 0 +librilm-9918 librilm 182 AH B EH T ER AH F AY N ER AH N OW B L ER F EH L OW DH AE N HH IY N EH V ER L AY V D AH N D DH AE T S W AH T AY W AA N T Y UW F EH L OW Z T UW N OW 0 +librilm-9919 librilm 254 AH B EH T ER AH K AW N T AH V AH M IH L AH T EH R IY AE K SH AH N DH AE N DH AE T W IH CH S M OW L IH T G IH V Z AH V DH AH B AE T AH L AH V DH AH B OY N IH T W UH D B IY HH AA R D T UW F AY N D 0 +librilm-9920 librilm 117 AH B EH T ER AH K AW N T AH V DH AH SH UH G ER DH AE N AY K UH D HH AE V IH K S P EH K T AH D 0 +librilm-9921 librilm 135 AH B EH T ER AH K AW N T AH V DH IH S IH N S AH D AH N T W AA Z W AY D L IY P R IH N T AH D AE T DH AE T T AY M 0 +librilm-9922 librilm 251 AH B EH T ER AH K AW N T T UW DH AE N DH AH N UW Z P EY P ER W AH N F UH L ER IH G Z AE K T ER M AO R D IH T EY L D B AE K T AH P B AY F IH G Y ER Z D AW N TH R IY L AO NG SH IY T S AH N D HH AE F W EY D AW N AH F AO R TH 0 +librilm-9923 librilm 84 AH B EH T ER AH K W EY N T AH N S W IH L S UW N D IH S K AH V ER DH IH S 0 +librilm-9924 librilm 258 AH B EH T ER AH K W EY N T AH N S W IH DH DH AH AA P ER EY SH AH N N OW N T UW M AA D ER N Z AE Z S P AY K IH NG AH P IY S W UH D HH AE V EH N EY B AH L D HH IH M T UW M EY K DH AH B L OW IH R EH P ER AH B AH L 0 +librilm-9925 librilm 595 AH B EH T ER AH K W EY N T AH N S W IH DH DH IH S L AE N D AH V F R IY D AH M W IH L SH OW Y UW DH AE T F AH D EH L AH T IY AH N D HH AH N ER B IH T W IY N HH AH Z B AH N D AH N D W AY F AA R HH IY R N OW R EH R IH K S EH P SH AH N Z B AH T DH AH Y UW N AH V ER S AH L R UW L B AH T Y UW M AH S T N OW AE T W AH N S DH AE T W IY D UW N AA T DH EH R F AO R EH K S ER S AY Z EH N IY S UW P ER HH Y UW M AH N V ER CH UW B AH T S IH M P L IY AE K T IH N K AH N F AO R M AH T IY W IH DH DH AH R IY L N EY CH ER AH V M AE N 0 +librilm-9926 librilm 176 AH B EH T ER AE D M AY R ER DH AE N M IY SH IY W IH L N AA T F AY N D IH N HH ER T AW N K AW N S AH L N AO R IH N HH AY ER S AH S AY AH T IY 0 +librilm-9927 librilm 143 AH B EH T ER AH D V ER T AH Z M AH N T DH AE N DH AH K AO R N EH T S OW L OW K UH D N AA T HH AE V B IH N D IH V AY Z D 0 +librilm-9928 librilm 213 AH B EH T ER AO L R AW N D M AE N DH AE N S IY S AH L AY SH UH D HH OW P S EH D D EY V IH D L IH N T AH N W IH DH AH S AW N D L AY K AH S N AO R T AH V R AE TH 0 +librilm-9929 librilm 140 AH B EH T ER AO L T ER K UH D N AA T HH AE V B IH N S AH L EH K T AH D IH N AO L DH AE T V AE S T R IY JH AH N 0 +librilm-9930 librilm 423 AH B EH T ER AE N K AY N D ER W UH M AH N DH AE N M IH S IH Z HH Y UW IH T Y UW W UH D AH N T F AY N D N AA T IH F Y UW W AA Z T UW W IH DH D IH F AH K AH L T IY DH AH S T R EY N JH ER AH B T EY N D AH F Y UW D IH T EY L Z AH V DH AH AO R AH JH AH N AH N D K AO R S AH V DH AH IH L N AH S D IH T EY L Z HH OW L IY M IH S L IY D IH NG B AH T D IH V AY Z D T UW R IY AH SH UH R 0 +librilm-9931 librilm 211 AH B EH T ER AE NG K ER AH JH DH AE N DH IH S P AA R T AH V DH AH K OW S T AH F AO R D AH D HH AE V IH NG B IH N F AW N D DH AH SH IH P B R AO T AH P HH IY R 0 +librilm-9932 librilm 147 AH B EH T ER AH N D B OW L D ER K AA R D P L EY ER DH AE N L AO R D B EH L IH NG ER N EH V ER HH EH L D AH T R AH M P 0 +librilm-9933 librilm 164 AH B EH T ER AH N D M AO R K R IH S CH AH N M AE N S K EH R S L IY EH V ER B R IY DH D DH AE N JH OW S AH F AE D AH S AH N 0 +librilm-9934 librilm 98 AH B EH T ER AH N D M AO R K AH N S IH S T AH N T W UH M AH N N EH V ER L AY V D 0 +librilm-9935 librilm 442 AH B EH T ER AH N D M AO R AA N ER AH B AH L AO F ER IH NG IH Z M EY D T UW AW ER M AE S T ER IH N M IH N AH S T R IY T UW DH AH P UH R IH N IH K S T EH N D IH NG DH AH N AA L AH JH AH V HH IH Z N EY M IH N DH AH P R AE K T AH S AH V DH AH V ER CH UW Z B AY W IH CH DH AE T N EY M IH Z HH AE L OW D DH AE N IH N M AH T IH R IY AH L P R EH Z AH N T S T UW HH IH Z T EH M P AH L 0 +librilm-9936 librilm 555 AH B EH T ER AH N D M AO R S P IY D IY P L AE N W UH D P ER HH AE P S HH AE V B IH N T UW S IY K AW T W AH N AH V Z UW M AH L AA K S W AA R OW Z EY D Z D IY K AE M P R IH L EY T T UW HH IH M HH IH Z R IY S AH N T AE D V EH N CH ER Z P R OW D UW S R IY T AH Z L EH T ER IH N K ER AO B ER EY SH AH N AH V HH IH Z V ER AE S IH T IY AH N D R IH K W EH S T HH IH M T UW F AO R W ER D IH T AO R P R AH V AY D HH IH M W IH DH AH HH AO R S T UW T EY K IH T HH IH M S EH L F 0 +librilm-9937 librilm 142 AH B EH T ER AH N D W AY Z ER P R IH N S SH AE L ER AY Z HH UW SH AE L R IH S T AO R P R AA S P EH R AH T IY T UW JH UW D AH 0 +librilm-9938 librilm 453 AH B EH T ER AE N S ER IH Z DH AE T DH AH K AO R T HH AE Z B IH F AO R HH AE N D S T R AO NG P R IY Z AH M P T IH V EH V AH D AH N S AH V DH AH K R AY M AH N D DH AE T AH P R IH Z AH N ER IH Z N AA T P UH T T UW DH AH T AO R CH ER AH N T IH L IH T HH AE Z B IH N W EH L AE S ER T EY N D B AY T EH S T AH M OW N IY AH B T EY N D EH L S W EH R DH AE T HH IY IH Z AH G R EY T AH F EH N D ER 0 +librilm-9939 librilm 151 AH B EH T ER AE N T IH D OW T T UW DH AH S T OW N W IH DH IH N P EH R IH S IH Z T UW B IY F AW N D IH N DH AH S T OW N ER AW N D IH T 0 +librilm-9940 librilm 139 AH B EH T ER AH P AA L AH JH IY L AY Z IH N DH AH T EH K S T AH P R EH SH AH N M AE K IH TH AH W AY Z M AE N M AE D 0 +librilm-9941 librilm 355 AH B EH T ER AH P AA L AH JH IY M EY B IY F AW N D IH N DH AH IH M AH T EY T IH NG DH AH K AH N F EH SH AH N AH V AA N AH S T B EH N AH D IH K T DH AE T W EH N HH IY S EH D HH IY W UH D D AY AH B AE CH AH L ER HH IY D IH D N AA T TH IH NG K HH IY SH UH D L AY V T UW B IY M EH R IY D 0 +librilm-9942 librilm 303 AH B EH T ER AH P OY N T AH D AA R M IY K AH N S IH S T IH NG AH V DH AH V EH R IY F L AW ER AH V SH IH V AH L R IY AH V Y UH R AH P HH AE D IH N DH AH M IY N T AY M AH S EH M B AH L D T UW F AA L OW DH AH S EY M P AE TH DH OW IH N AH D IH F ER AH N T M AE N ER 0 +librilm-9943 librilm 163 AH B EH T ER AA R G Y AH M AH N T IH N F EY V ER AH V HH AA R T F ER D IH Z DH AE T TH R IY R EY L R OW D Z S EH N T ER DH EH R 0 +librilm-9944 librilm 106 AH B EH T ER AA R M ER ER N EH V ER L EY D HH AE M ER AA N AE N V AH L 0 +librilm-9945 librilm 316 AH B EH T ER AA R M IY M AE N F AO R M AE N P R AA B AH B L IY N EH V ER F EY S T AE N EH N AH M IY DH AE N DH AH W AH N K AH M AE N D AH D B AY JH EH N ER AH L T EY L ER IH N DH AH ER L IY AH S T T UW EH N G EY JH M AH N T S AH V DH AH M EH K S AH K AH N W AO R 0 +librilm-9946 librilm 144 AH B EH T ER AA R T DH AE N DH AE T AH V IY JH AH P T HH AE Z T EY K AH N F IH R AH N D K ER AH P SH AH N AW T AH V IH T 0 +librilm-9947 librilm 116 AH B EH T ER AH S AO R T IH D K AH P AH L Y UW K UH D F AY N D N OW W EH R 0 +librilm-9948 librilm 235 AH B EH T ER AH T ER N IY F AO R DH AH P ER P AH S AH Z T UW W IH CH HH IH Z L AY F W AA Z D IH V OW T AH D D IH D N AA T IH G Z IH S T IH N L AH N D AH N DH AE N M IH S T ER K AE M P D ER AW N 0 +librilm-9949 librilm 133 AH B EH T ER B AA R G AH N W AA Z D R IH V AH N IH N DH AH W IH R IY S K W EH R DH AE N EH N IY W EH R EH L S 0 +librilm-9950 librilm 93 AH B EH T ER B EY S AH S F AO R F R EH N D SH IH P K UH D N AA T B IY 0 +librilm-9951 librilm 112 AH B EH T ER B AE TH HH IY R R IH T ER N D R OW L Z AH N D N AH TH IH NG T UW P EY 0 +librilm-9952 librilm 64 AH B EH T ER B EH D AY N EH V ER HH AE D 0 +librilm-9953 librilm 128 AH B EH T ER B EH D R AA K P R IH N S AH P AH L K AE N HH AA R D L IY B IY IH M AE JH AH N D 0 +librilm-9954 librilm 331 AH B EH T ER B IH G IH N IH NG K UH D N AA T B IY M EY D DH AE N W IH DH DH AH HH IH R OW Z AH V HH OW M S T EH D AH N D IH T IH Z AH S P EH SH L IY F IH T IH NG DH AE T DH AH F ER S T IH M P AH T AH S SH UH D B IY G IH V AH N IH N K AH N EH K SH AH N W IH DH DH IH S HH IH S T ER IY 0 +librilm-9955 librilm 75 AH B EH T ER B IH HH EY V D L AE D D AH Z AH N T S T EH P 0 +librilm-9956 librilm 110 AH B EH T ER B AA D IY AH V HH EH L P ER Z K UH D S K EH R S L IY B IY G AA T AH N T AH G EH DH ER 0 +librilm-9957 librilm 94 AH B EH T ER B UH K F AO R B OY Z HH AE Z N EH V ER B IH N R IH T AH N 0 +librilm-9958 librilm 86 AH B EH T ER B UH K DH AE N AY SH AE L EH V ER R AY T W AA Z DH EH R 0 +librilm-9959 librilm 97 AH B EH T ER B UH K DH AE N DH AH P R IH Z AH N ER AH V Z EH N D AH 0 +librilm-9960 librilm 131 AH B EH T ER B UH K DH AE N DH IH S AA N P EH R IH S IH N T AY M HH AE Z N AA T CH AE N S T IH N AW ER W EY 0 +librilm-9961 librilm 65 AH B EH T ER B AA T AH L DH AE N DH AH F ER S T 0 +librilm-9962 librilm 93 AH B EH T ER B AW N D ER IY W UH D B IY DH AH R IH V ER IH T S EH L F 0 +librilm-9963 librilm 214 AH B EH T ER B OY T UW AE N AW L D F AA DH ER DH AE T S G UH D F AO R N AH TH IH NG N AW IH N DH IH S W ER L D N EH V ER W AA Z P L EY Z Y AO R HH AH N ER 0 +librilm-9964 librilm 249 AH B EH T ER B R EY V ER S OW L JH ER AO R AH M AO R F EY TH F AH L F R EH N D N OW M AE N EH V ER N UW DH AE N CH AA R L Z D IY N T R UW P AH S IH K S TH M IH SH IH G AH N K AE V AH L R IY 0 +librilm-9965 librilm 33 AH B EH T ER B R EY K 0 +librilm-9966 librilm 255 AH B EH T ER B R AY T ER B OY N EH V ER D R UW B R EH TH HH IY S ER V D Y UW F EY TH F AH L AE Z DH AH D EY W AA Z L AO NG AH N D Y UW T R IY T AH D HH IH M SH EY M F AH L W ER S AH N AH S L EY V 0 +librilm-9967 librilm 197 AH B EH T ER B R AH DH ER N EH V ER L AY V D B AH T HH IY M EY HH AE V B IH N T UW R EH D IY T UW F AO L IH N W IH DH AH DH ER P IY P AH L Z V Y UW Z 0 +librilm-9968 librilm 187 AH B EH T ER B R AO T AH P B EH T ER D IH S P OW Z D Y UW TH DH AE N Y UW W ER W IH DH AH HH AY ER S EH N S AH V HH AH N ER K UH D N AA T B IY F AW N D 0 +librilm-9969 librilm 97 AH B EH T ER K AE P T AH N D OW N T W AO K DH AH D EH K Y AO R HH AH N ER 0 +librilm-9970 librilm 157 AH B EH T ER K AE P T AH N T UW L EH D AH B EH T ER S OW L JH ER T UW S T R AY K W IH DH DH AH S AO R D AY N EH V ER S AO 0 +librilm-9971 librilm 40 AH B EH T ER CH AE N S 0 +librilm-9972 librilm 144 AH B EH T ER CH AE N S AH W EY T S DH IY SH IY M IY T S DH AH F OW M IY T S W EH N SH AE L SH IY R IH T ER N 0 +librilm-9973 librilm 141 AH B EH T ER CH AE N S F AO R AH SH AA T K UH D HH AA R D L IY HH AE V B IH N AE S K T F AO R 0 +librilm-9974 librilm 561 AH B EH T ER CH AE N S F AO R HH IH Z P AW ER Z AH K ER D IH N DH AH AH S EH M B L IY AH V DH AE T Y IH R IH N K AH N EH K SH AH N W IH DH AH P L ER AE L IH T IY K EY S W EH R DH AH W AH N D ER F AH L D IH S P L EY AH V HH IH Z T AE L AH N T S K AH N T R IH B Y UW T IH D M AH CH T UW DH AH P AE S IH NG AH V AE N EH N AE K T M AH N T DH AE T N OW P R AH F EH S ER SH IH P IH N AH Y UW N AH V ER S AH T IY SH UH D B IY HH EH L D IH N K AH N EH K SH AH N W IH DH AH K AH N T R IY CH AA R JH 0 +librilm-9975 librilm 76 AH B EH T ER CH AE N S F AO R Y UW IH Z K AH M IH NG 0 +librilm-9976 librilm 56 AH B EH T ER CH AE N S M EH R IY 0 +librilm-9977 librilm 154 AH B EH T ER CH AE N S S EH D M IH S T ER HH Y UW M G R IH M L IY DH AE N W IY HH AE V AH V K AE CH IH NG DH AH OW K AA P IY 0 +librilm-9978 librilm 316 AH B EH T ER CH AE N S DH AH B AE L AH W EY N K AH N T IH N Y UW D AH V DH AH F Y UW P L EY S AH Z OW P AH N IH N DH AH AY L AH N D DH AE N IH F HH IY W ER B R AO T AH P AE T DH AH M AE NG K S B AA R OW N L IY W IH CH W UH D K AA S T M IY L EH S DH AE N HH AE F AE Z M AH CH 0 +librilm-9979 librilm 286 AH B EH T ER K EH R IH K T ER HH AE Z B IH N G IH V AH N T UW DH AH R EH G Y AH L ER T R UW P S F AO R DH EH R IH N D EH V ER Z T UW D IH S P ER S DH AH P IY P AH L W IH TH AW T W UW N D IH NG AO R AH DH ER W AY Z IH N JH ER IH NG DH EH M 0 +librilm-9980 librilm 129 AH B EH T ER CH AY L D N EH V ER B R IY DH D S EH D D AO L T AH N D R IH NG K IH NG AO F HH IH Z G L AE S 0 +librilm-9981 librilm 145 AH B EH T ER S IH T AH Z AH N D AH Z N AA T IH G Z IH S T AH N D AW ER F R EH N D SH IH P HH AE Z N EH V ER F AA L T ER D 0 +librilm-9982 librilm 306 AH B EH T ER K L EY M M AY T B IY R EY Z D AH P IH N Y AO R G R EY S IH Z OW N P ER S AH N S EH D DH AH ER L AH V AA K S F ER D IH F Y UW W IH L AH F AO R D M AA R G ER IH T AH V AE N JH UW DH AH S UW K AO R SH IY R IY K W AY ER Z B AY M IY 0 +librilm-9983 librilm 222 AH B EH T ER K L AE S AH V M EH N S IY M T UW B IY JH OY N IH NG DH AH K AH L ER Z DH IY Z D EY Z AH N D DH EY AA R K AO L IH NG DH EH R D IH F AE M ER Z T UW AH S T R IH K T AH K AW N T IH NG 0 +librilm-9984 librilm 341 AH B EH T ER K L AE S AH F AH K EY SH AH N B EY S T AA N M AO R K EH R F AH L S T AH D IY AH V DH AH HH IH S T ER IY AH V DH AH IH NG G L IH SH V ER B D IH V AY D Z V ER B Z IH N T UW DH OW Z AH V DH AH W IY K AH N D DH OW Z AH V DH AH S T R AO NG K AA N JH AH G EY SH AH N Z 0 +librilm-9985 librilm 454 AH B EH T ER K L AE S AH F AH K EY SH AH N IH Z IH N T UW DH AH S OW SH AH L IH N K L UW D IH NG G UH D W IH L L AH V AH V R EH P Y AH T EY SH AH N D IH Z AY ER AH V AE M IH T IY R IH L IH JH AH N D IH S OW SH AH L D IH S P L EH ZH ER S EH L F R AH G AA R D IH NG F IH Z IH K AH L D IH Z AY ER P EH K Y UW N IY EH R IY IH N T R AH S T L AH V AH V P AW ER S EH L F P R EH Z ER V EY SH AH N 0 +librilm-9986 librilm 269 AH B EH T ER K AH M IY D IY AH N Y UW M EY B IY B AH T HH IY HH AE Z N AA T Y AO R S K R UW P AH L Z Y AO R S EH N S AH T IH V N AH S AH N D IH Z DH EH R F AO R M AO R D EH K S T ER AH S AE T D R AO IH NG DH AH K R AW D Z AH T EH N SH AH N 0 +librilm-9987 librilm 237 AH B EH T ER K AH M AE N D ER AY D N EH V ER D IH Z AY ER T UW S ER V AH N D HH UW N OW Z B AH T AY M EY HH EH L P T UW S EH T AH P DH AY S T AE N D IH NG R IH G IH NG IH N AH N AH DH ER W ER L D 0 +librilm-9988 librilm 789 AH B EH T ER K AA M EH N T K UH D N AA T B IY M EY D AA N W AH T IH Z R IY K W AY ER D T UW P ER F IH K T M AE N AH N D P L EY S HH IH M IH N DH AE T S UW P IH R IY ER P AH Z IH SH AH N F AO R W IH CH HH IY W AA Z D IH Z AY N D DH AE N B AY DH AH IH N T ER P R IH T EY SH AH N AH V B EY K AH N AH P AA N DH AH L EH JH AH N D Z AH V DH AH S IH R AH N K OW S T W EH N DH AH W AY Z Y UW L IH S IY Z P AE S T S EH Z HH IY HH IY K AA Z D HH IH Z M EH R AH N ER Z T UW S T AA P DH EH R IH R Z W IH DH W AE K S N OW IH NG DH EH R W AA Z IH N DH EH M N OW P AW ER T UW R IH Z IH S T DH AH L UH R AH V DH AE T V AH L AH P CH AH W AH S S AO NG 0 +librilm-9989 librilm 315 AH B EH T ER K AH M P AE N Y AH N DH AE N HH ER W AY T K IH T AH N AO R HH ER F EY V ER IH T N IH R OW AO R IY V IH N HH ER F EY TH F AH L F R EH N D P IY EH R DH AH S EY N T B ER N AA R D AA K Y AH P AY D DH AH AH DH ER V EH L V AH T R AA K IH NG CH EH R 0 +librilm-9990 librilm 155 AH B EH T ER K AH M P EH R AH S AH N IH Z DH AE T W IH CH M IH S T ER G AA S HH AE Z M EY D W IH DH S IH D N IY D OW B AH L Z B AO L D ER 0 +librilm-9991 librilm 227 AH B EH T ER K AH N S EH P SH AH N AH V L AE NG G W AH JH K UH D N AA T HH AE V B IH N F AO R M D IH N P L EY T OW Z EY JH DH AE N DH AE T W IH CH HH IY AE T R IH B Y UW T S T UW S AA K R AH T IY Z 0 +librilm-9992 librilm 130 AH B EH T ER K AH N D IH SH AH N AH V TH IH NG Z N AW P R IY Z EH N T AH D IH T S EH L F 0 +librilm-9993 librilm 99 AH B EH T ER K AH N D AH K T ER SH IY K UH D N AA T HH AE V W IH SH T 0 +librilm-9994 librilm 168 AH B EH T ER K AH N D AH K T ER W UH D B IY DH AH M EH T AH L K AH V ER IH NG AH V DH AH R UW F W EH N S AH CH M AH T IH R IY AH L IH Z Y UW Z D 0 +librilm-9995 librilm 463 AH B EH T ER K AA N S T AH T UW T AH D B OY W UH D S ER T AH N L IY HH AE V P R AA F AH T AH D AH N D ER M AY IH N T EH L AH JH AH N T T UW T ER Z W IH DH DH EH R S AY AH N T IH F IH K AE P ER AE T AH S AH N D W UH D D AW T L AH S HH AE V F AW N D DH AH F AH N AA M AH N AH AH V IH L EH K T R IH S AH T IY AH N D M AE G N AH T IH Z AH M AE Z F AE S AH N EY T IH NG AE Z AY W AA Z EH V ER IY TH ER Z D EY AH SH UH R D DH EY W ER 0 +librilm-9996 librilm 81 AH B EH T ER K AH N T R IY F AO R HH IH M DH AE N DH IH S 0 +librilm-9997 librilm 415 AH B EH T ER K AH N T R IY W AA Z R IY CH T AE Z W IY N IH R D DH AH R IH V ER AH N D IH T W AA Z AH P L EH Z AH N T S AY T T UW S IY DH AH T AH M B AH L IH NG S T R IY M AH V DH AH L EH S ER T UW G EY L AH AH N D T UW F AY N D IH N W AH N V AE L IY DH AH P R IY T AH N S AH V AH G AA R D AH N AH N D AH HH AW S AH M AH NG T R IY Z 0 +librilm-9998 librilm 370 AH B EH T ER K AO R S AE T DH AH R IH NG K UH D N AA T B IY R AH N DH AE N S ER JH AO S L IH N HH AE TH P ER F AO R M D N AO R K UH D G R EY T ER V AE N T AH JH B IY G EY N D IH N DH AH JH AW S T S DH AE N HH IY HH AE TH AH B T EY N D OW V ER DH AH M AA R K IY AH V B AH K IH NG HH AE M 0 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/train_exmples.tsv b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/train_exmples.tsv new file mode 100644 index 0000000000000000000000000000000000000000..bbdef25c7cf1c806740740956a25ce2aff41d007 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/fast_phone2unit/train_exmples.tsv @@ -0,0 +1,100 @@ +id speaker n_frames tgt_text duration unit +103-1240-0000 103 704 1 10 4 29 33 14 38 5 25 1 24 19 31 19 40 30 15 10 5 23 19 25 11 1 19 40 31 14 29 30 8 40 11 1 24 19 31 19 40 30 15 10 5 23 19 25 11 1 23 19 37 11 1 21 5 31 33 38 13 30 12 20 4 37 5 25 23 20 24 15 25 30 27 11 1 11 19 29 33 1 11 7 25 19 25 33 36 5 23 19 33 5 23 18 3 23 27 1 16 30 19 25 21 11 38 19 34 6 23 11 14 40 5 25 11 23 15 11 20 40 19 30 11 30 3 29 31 5 25 11 33 30 5 37 14 31 33 9 8 5 9 30 35 22 1 22 6 3 4 2 4 5 4 9 14 5 2 4 2 5 4 4 3 4 6 5 5 4 1 5 3 4 2 4 2 9 6 4 84 4 2 4 4 5 4 3 4 4 6 6 6 7 3 5 3 3 2 4 6 4 5 4 2 3 3 2 5 8 3 2 5 2 7 8 7 7 4 9 5 8 3 3 4 4 5 4 8 3 1 3 4 2 3 5 2 2 2 4 5 4 5 8 13 8 2 3 4 4 2 2 2 2 7 4 4 4 5 1 2 4 3 6 2 5 4 3 3 4 3 4 4 4 2 1 3 2 2 2 2 7 3 4 2 5 4 5 3 5 6 6 17 17 17 296 296 317 317 317 317 317 491 461 461 461 461 461 461 491 491 184 184 184 289 310 107 107 395 351 486 486 460 215 215 35 96 272 300 382 382 245 43 364 276 174 174 174 174 319 282 282 388 303 303 117 404 404 439 439 225 225 225 225 225 225 225 491 391 391 47 491 73 80 289 7 7 217 473 258 258 258 31 342 224 494 494 494 368 281 9 142 142 147 147 329 329 329 329 329 329 36 310 107 395 395 302 302 497 497 251 251 241 241 431 329 432 330 330 388 195 195 64 212 212 131 483 226 226 226 209 356 356 356 356 31 162 68 224 224 494 494 215 129 74 190 190 499 499 499 265 265 85 85 85 85 207 318 185 185 433 433 86 6 6 227 419 417 417 417 237 237 237 237 237 237 237 237 237 237 237 237 362 491 362 305 40 491 305 40 40 362 362 40 40 40 40 40 40 40 40 218 491 218 218 218 491 305 218 491 218 218 218 218 218 218 491 218 435 491 218 491 218 218 218 491 218 218 491 369 491 369 369 369 369 369 21 21 21 21 21 21 21 21 408 408 408 149 228 228 491 289 320 7 473 258 258 258 258 342 342 224 494 494 494 494 31 9 9 142 397 147 147 329 329 329 329 329 143 36 107 107 395 302 302 497 497 251 251 251 241 241 431 278 278 278 278 330 388 388 195 195 195 243 212 131 419 439 225 225 225 80 491 80 7 7 251 241 431 278 278 278 173 173 402 402 401 401 401 401 401 491 310 107 395 395 180 151 151 151 169 150 150 86 86 238 6 272 397 133 345 109 109 109 264 264 313 216 216 22 448 448 448 14 14 14 145 145 145 486 460 460 460 173 280 29 242 242 116 33 250 250 251 241 81 444 324 324 324 324 324 301 339 217 217 217 217 217 473 65 290 290 290 290 290 434 434 339 339 33 250 250 42 42 147 147 380 288 84 496 496 496 496 496 274 274 37 24 131 404 439 78 414 80 80 80 80 80 80 80 401 384 371 278 278 278 215 35 35 96 401 401 401 401 401 401 401 401 401 239 384 371 180 315 315 315 315 315 450 450 413 413 94 199 340 340 33 76 465 377 123 123 123 88 88 44 44 44 251 251 241 431 278 278 285 285 302 302 497 497 497 58 72 72 72 437 481 481 481 481 481 481 175 175 81 84 84 84 496 274 98 98 229 247 247 126 126 126 326 326 326 326 326 101 101 149 228 491 373 393 234 234 155 190 190 487 288 288 278 330 339 64 64 212 310 447 447 6 272 472 345 333 333 220 220 164 14 14 411 411 284 481 481 481 293 293 122 122 384 300 334 334 304 304 304 49 269 342 168 89 89 89 446 33 33 250 251 251 241 431 470 171 171 171 252 252 325 34 41 324 324 318 368 368 342 9 219 485 286 286 382 382 313 236 239 161 161 79 499 499 405 405 206 215 215 233 270 270 433 342 224 89 89 322 67 394 76 465 161 161 492 492 492 8 8 280 498 498 498 498 498 396 186 39 54 238 6 272 472 336 336 62 62 62 62 62 146 464 44 44 44 8 32 401 354 190 190 380 380 499 496 496 496 178 233 233 458 192 419 427 247 247 15 193 193 17 +103-1240-0001 103 797 1 12 5 33 18 4 11 19 33 31 6 30 31 5 38 15 9 4 22 19 25 12 5 38 35 11 40 5 37 12 20 27 23 11 22 5 34 9 14 33 29 23 15 31 1 19 33 38 5 40 30 19 29 39 36 33 19 11 33 19 9 20 5 25 19 25 33 30 5 22 5 33 18 13 11 23 6 26 9 30 35 22 19 25 19 33 31 1 14 23 20 14 22 6 30 31 34 30 36 12 27 40 38 35 11 40 1 38 19 34 11 3 30 22 31 20 22 30 19 33 31 5 37 29 36 23 5 25 11 22 4 31 22 15 11 1 9 5 33 9 8 12 5 33 8 24 19 33 30 20 10 33 23 19 25 11 40 18 3 23 27 19 33 38 5 40 5 22 38 8 5 33 1 38 13 23 22 5 25 11 5 22 33 5 11 23 19 33 5 23 31 33 30 20 24 1 8 2 2 2 7 3 2 3 4 8 4 5 4 4 5 9 6 4 4 3 3 1 3 7 3 3 3 1 2 3 4 4 2 4 5 3 6 3 3 4 4 2 5 12 29 6 3 3 2 3 2 3 3 2 4 2 3 3 1 2 3 5 1 3 5 4 4 2 1 3 4 3 9 3 4 3 6 6 3 4 2 5 2 2 2 3 2 2 6 3 3 3 6 4 3 4 3 2 2 3 5 5 5 4 4 5 14 4 2 7 4 4 5 4 9 4 4 2 2 3 3 2 3 7 9 3 3 2 2 6 4 6 3 9 5 27 2 6 4 3 5 2 2 6 5 3 2 3 4 4 5 2 4 4 4 2 2 5 4 5 9 5 3 3 2 3 2 6 3 8 3 4 2 5 3 4 3 2 4 3 3 4 2 3 3 3 2 2 2 3 3 4 2 6 6 6 17 17 363 363 51 51 228 320 127 45 45 45 385 131 58 72 72 110 110 110 110 486 460 240 240 325 34 154 154 154 457 478 478 232 232 482 482 172 115 273 273 153 153 153 372 372 396 396 186 186 54 54 172 224 273 255 255 43 364 364 276 109 109 403 403 403 403 403 207 246 324 301 301 129 401 354 354 180 376 376 376 460 178 178 458 192 192 242 340 116 466 466 22 283 455 43 364 364 276 276 153 153 496 496 37 37 24 77 270 342 224 69 69 130 130 198 22 448 448 448 464 180 424 424 424 424 424 274 122 131 472 221 401 82 144 27 437 151 151 169 169 164 164 472 221 401 259 29 380 382 396 313 385 35 472 401 259 74 425 425 386 343 343 343 343 343 358 358 39 39 433 433 160 160 160 112 427 56 56 491 312 312 341 341 341 341 341 341 12 12 12 21 21 21 21 21 21 21 21 21 408 408 408 408 391 391 228 491 491 412 177 177 177 177 177 131 133 345 141 141 141 281 453 142 397 456 456 456 456 129 259 74 485 485 485 485 374 374 325 449 449 191 191 191 314 314 36 377 87 87 8 8 420 420 420 324 464 44 44 44 94 335 335 411 411 188 121 121 33 64 76 465 465 161 161 487 469 469 143 458 192 192 278 278 278 37 314 131 472 72 72 72 72 72 72 110 110 443 120 240 314 314 26 26 26 251 241 431 235 235 235 235 235 413 200 200 248 248 248 212 354 190 380 380 499 496 496 496 178 233 458 192 192 340 340 340 94 199 154 154 77 342 342 142 14 411 498 498 498 498 498 134 175 81 166 324 324 464 382 382 245 129 458 208 208 441 441 441 153 153 372 372 396 186 186 323 323 238 6 272 377 487 487 374 313 216 216 114 124 124 124 274 274 368 269 9 142 397 336 276 109 109 496 496 496 37 37 37 24 270 270 433 160 427 229 247 247 126 126 326 326 326 326 326 101 101 149 149 228 228 491 345 333 333 333 220 220 164 402 221 401 401 401 491 384 371 180 106 306 306 306 306 396 396 178 178 35 458 96 96 66 66 68 68 68 68 115 115 444 213 213 213 143 458 208 208 487 487 288 277 385 143 270 270 342 224 69 462 462 130 402 402 401 401 491 74 190 441 441 441 153 153 182 182 182 182 182 497 175 175 81 89 89 446 116 33 131 472 221 458 445 445 351 351 486 486 460 460 169 150 342 342 86 105 336 445 445 470 403 403 171 171 171 246 246 252 24 131 404 439 78 170 305 491 28 28 28 491 491 491 2 201 305 305 491 305 305 2 316 316 316 316 316 491 491 289 289 289 320 354 159 159 159 159 159 240 35 131 472 221 336 354 62 62 62 62 62 438 216 22 283 455 236 108 119 119 103 103 103 103 103 85 299 203 53 473 177 177 143 131 133 133 147 380 288 213 213 213 252 143 310 447 447 447 26 26 251 251 241 81 329 329 329 330 388 195 195 471 471 49 453 142 58 72 72 437 437 481 481 481 481 293 175 175 81 84 84 84 84 84 16 274 274 98 483 483 440 188 177 177 177 131 133 133 345 141 141 141 281 9 168 44 44 143 458 208 208 441 441 441 346 346 265 265 85 85 85 146 146 277 277 277 385 385 227 419 225 225 226 197 7 364 276 109 109 139 139 293 293 122 143 458 144 27 27 121 116 33 33 212 239 371 180 151 151 151 178 35 96 96 36 272 191 191 191 37 314 26 251 241 431 431 278 285 285 302 302 497 497 186 162 482 482 338 238 161 79 487 288 288 360 360 434 434 434 203 381 381 404 13 491 247 15 193 193 193 17 +103-1240-0002 103 697 1 16 6 30 25 3 33 20 37 19 25 5 9 30 35 22 1 22 35 11 30 5 25 29 4 31 33 24 19 31 19 40 30 15 10 5 23 19 25 11 40 11 6 30 38 19 12 7 33 11 39 36 30 19 17 3 30 11 16 14 11 20 31 5 25 31 20 4 25 11 19 22 6 30 5 24 1 19 33 29 30 3 9 5 9 23 20 38 5 40 22 3 25 32 5 31 12 5 33 24 19 31 19 40 30 15 10 5 23 38 5 40 31 19 33 19 26 4 33 18 14 38 19 25 11 27 1 22 20 29 19 26 5 32 3 30 29 8 3 25 13 37 30 20 34 19 26 12 5 33 29 4 31 33 1 16 14 24 9 30 35 22 31 5 25 11 10 19 23 11 30 5 25 5 29 1 8 6 2 3 2 6 2 6 3 1 3 3 5 5 3 10 4 4 2 4 4 3 5 5 7 4 2 2 3 3 3 4 3 3 3 3 7 3 4 3 3 3 3 3 3 2 3 6 2 2 3 4 4 2 3 6 4 3 3 4 4 5 4 3 5 5 7 3 2 4 2 6 4 5 2 6 32 6 3 3 3 3 2 3 3 3 4 3 2 3 6 4 4 4 5 5 2 2 4 2 3 4 3 5 4 4 4 4 5 3 2 2 5 1 3 2 5 2 3 4 3 4 2 4 2 9 11 4 4 3 2 4 2 9 4 3 3 11 5 4 4 5 3 3 4 2 4 2 2 2 7 9 6 4 9 3 4 3 2 2 3 3 3 2 2 2 3 2 3 3 2 1 4 8 6 9 17 17 17 363 363 51 51 228 491 373 155 155 155 148 148 387 372 313 10 479 479 307 307 307 307 61 167 449 449 34 357 357 357 357 357 173 280 29 242 116 94 199 44 44 44 8 129 401 259 354 190 190 380 380 499 496 496 496 167 233 233 144 192 419 419 439 225 225 225 80 80 491 491 144 389 389 389 389 389 133 133 42 147 147 380 499 319 319 319 348 348 195 394 90 76 74 74 437 311 311 311 311 311 311 460 169 150 342 86 6 6 196 217 473 258 258 258 31 342 224 494 494 368 281 9 142 397 147 329 329 329 329 329 36 310 107 302 302 302 497 497 251 251 251 241 431 329 329 330 116 33 195 195 471 471 49 269 142 238 6 272 106 153 153 372 372 372 245 43 345 333 333 220 220 216 180 113 113 113 113 167 167 236 239 401 384 219 485 485 374 374 132 132 42 147 456 456 456 456 416 144 27 106 306 306 306 306 306 306 396 313 24 24 131 472 393 155 332 332 332 313 236 239 239 384 371 213 213 213 252 186 39 342 342 11 11 11 379 379 379 394 76 478 66 68 68 115 267 41 41 41 246 3 464 464 89 194 446 446 446 64 212 239 384 490 490 143 458 144 208 441 441 153 153 153 372 372 372 467 467 467 275 203 381 381 48 404 13 491 491 312 312 312 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 408 408 408 408 149 149 228 491 289 412 177 177 177 177 177 35 401 259 74 190 190 488 488 488 488 488 8 29 134 134 134 134 8 8 359 359 166 166 166 324 301 378 345 141 141 141 281 9 142 221 144 27 27 437 370 370 370 370 370 370 348 64 76 310 107 395 395 459 459 459 271 271 39 86 86 238 198 45 45 45 45 45 35 401 196 217 473 258 258 258 31 342 342 224 494 494 494 368 453 9 142 397 147 147 329 329 329 329 329 329 143 310 107 107 395 302 302 302 375 497 497 497 43 364 345 141 141 141 31 162 232 68 172 115 470 278 278 325 34 176 135 135 200 200 464 415 415 415 415 415 131 183 156 156 156 156 245 43 364 364 109 109 278 278 116 33 64 212 212 34 84 84 84 84 274 274 98 229 229 247 126 126 326 326 101 408 408 228 491 491 491 445 445 213 213 213 213 252 215 458 176 176 135 135 200 200 44 44 44 44 99 338 338 338 338 338 395 106 306 306 306 306 396 396 215 35 35 335 145 284 265 265 265 85 85 85 146 464 464 106 125 125 125 125 348 94 335 14 411 204 204 204 204 204 204 204 29 29 337 337 324 422 422 164 164 164 214 214 214 328 200 248 466 114 114 45 45 385 90 401 82 74 119 311 311 311 311 311 311 311 311 311 282 169 169 150 39 433 86 238 6 75 227 419 225 225 225 225 225 491 373 305 80 289 491 155 165 165 165 165 165 203 53 212 239 190 380 380 496 496 178 35 96 270 342 342 224 89 89 446 33 394 310 107 395 395 106 139 424 387 122 122 122 300 300 242 242 116 94 335 335 411 230 230 230 230 230 230 215 215 233 233 419 427 229 247 247 126 126 193 193 193 193 17 +103-1240-0003 103 735 1 4 25 11 12 4 33 19 16 32 20 25 27 33 5 31 33 13 25 20 34 19 26 1 3 11 1 6 30 7 33 5 37 29 23 15 31 1 32 20 38 35 11 25 13 37 14 30 13 31 33 5 25 33 19 23 32 20 18 4 11 16 13 30 5 33 19 11 7 33 12 5 38 8 40 5 25 11 38 13 30 16 6 30 40 12 13 30 5 37 1 12 13 30 3 30 29 23 13 25 33 20 5 37 29 20 29 5 23 19 25 4 37 5 25 23 20 4 25 11 7 33 5 37 19 33 1 18 36 22 5 25 5 33 13 25 11 22 23 27 31 23 20 33 19 12 13 30 25 15 9 14 40 9 19 40 25 19 31 9 8 11 19 25 33 5 37 25 19 17 23 13 22 33 19 26 12 13 30 27 25 1 8 7 2 1 2 4 2 4 3 3 2 5 4 2 3 4 4 3 4 3 3 2 3 2 11 2 2 4 4 6 3 2 2 7 2 9 7 14 7 3 3 2 3 4 3 3 4 5 4 8 3 3 3 2 3 2 5 3 2 1 4 4 2 4 3 2 2 3 7 4 2 3 6 11 5 2 2 4 3 3 4 6 3 3 4 3 2 8 5 9 61 3 2 1 2 3 5 2 2 3 2 3 1 3 3 5 2 2 4 5 4 5 2 3 5 3 9 5 2 4 7 2 2 3 4 5 13 4 2 3 3 2 3 5 4 3 4 5 3 4 6 2 2 4 1 2 2 3 3 5 4 2 5 3 2 5 3 4 3 4 6 4 3 3 3 2 2 3 2 3 2 3 3 3 1 3 3 2 5 6 5 12 17 17 17 363 363 51 149 228 228 209 83 194 194 194 322 322 67 212 127 45 45 45 45 240 240 325 118 118 118 118 118 402 338 400 400 400 30 301 301 10 479 331 84 84 496 274 252 36 449 459 459 459 31 342 86 86 6 272 483 483 411 475 475 475 475 475 475 475 475 349 164 164 214 214 214 214 200 248 14 14 411 287 284 284 284 426 426 426 206 206 206 24 335 335 226 157 157 157 157 157 245 14 14 411 145 113 113 113 113 285 285 34 462 462 130 402 401 401 491 74 425 425 386 386 431 343 343 343 343 358 358 358 358 358 39 433 433 433 160 427 247 247 247 126 126 292 326 326 326 326 326 408 408 149 228 491 373 338 338 400 400 400 400 301 378 43 345 389 389 389 314 314 196 309 309 479 331 463 463 463 463 280 29 382 245 245 42 42 147 380 380 288 443 443 120 169 169 150 39 433 86 86 86 6 6 272 34 89 319 319 348 394 76 108 377 139 139 139 139 293 186 99 338 400 400 400 30 3 58 254 254 254 314 131 393 234 234 261 25 470 264 264 468 468 468 396 313 143 449 449 191 191 191 325 180 180 113 113 113 113 113 167 314 314 401 401 198 22 283 455 455 43 364 364 276 346 346 346 265 265 265 265 85 85 85 146 146 318 318 368 453 342 168 89 89 446 116 212 131 133 43 364 276 109 109 264 264 264 468 245 245 349 234 234 155 25 148 148 148 372 372 304 304 49 9 9 221 198 127 114 114 264 264 468 406 406 467 467 106 284 284 426 426 206 206 37 173 352 352 352 352 419 439 439 237 237 237 491 491 491 28 491 491 491 491 341 341 341 341 341 341 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 260 491 163 163 163 163 163 163 316 491 316 316 73 289 289 289 127 114 0 222 468 353 353 353 353 215 35 259 74 425 425 386 431 432 330 330 348 76 449 41 324 464 462 462 462 402 221 259 74 351 213 213 213 252 252 129 354 100 100 100 497 497 335 335 440 188 188 340 116 94 199 145 145 145 486 460 460 173 280 29 242 242 116 33 250 250 359 359 474 474 474 324 246 19 3 3 14 14 209 145 194 194 446 446 388 64 212 335 14 411 145 145 113 113 113 450 413 285 34 223 223 223 280 280 277 277 277 277 277 233 75 227 427 229 247 312 292 292 326 326 101 101 149 391 228 491 491 373 489 489 489 489 143 458 144 389 389 389 94 199 255 255 236 36 108 119 119 351 432 432 432 330 388 195 64 131 472 472 221 458 144 208 208 425 386 386 496 496 496 274 186 186 54 54 86 26 26 474 474 166 301 143 36 377 123 123 123 114 222 222 222 222 313 10 10 479 398 290 171 171 252 215 458 29 382 382 304 368 269 342 142 221 336 354 278 278 278 368 453 342 86 196 196 94 459 459 459 271 31 342 342 221 221 336 354 62 62 62 62 62 438 438 143 36 384 371 371 278 278 330 33 64 76 108 449 69 223 130 402 196 479 331 255 154 416 458 208 386 431 151 151 151 178 458 96 36 272 176 135 135 200 248 248 127 114 222 222 222 406 406 467 467 350 350 350 350 350 350 413 413 303 48 404 13 229 491 491 312 15 15 15 193 193 193 17 +103-1240-0004 103 625 1 9 5 33 24 19 31 19 40 30 15 10 5 23 19 25 11 38 5 40 38 5 25 5 37 12 27 40 22 15 29 5 9 5 23 22 30 20 10 14 40 18 36 22 5 25 24 4 25 19 21 12 13 30 27 25 22 5 25 31 14 25 40 4 25 11 12 27 40 5 37 5 12 14 16 27 22 31 19 25 33 5 12 5 9 3 30 17 5 25 1 32 20 38 5 40 5 25 27 33 5 9 5 23 18 7 31 38 8 16 1 18 14 38 14 22 38 5 40 6 23 38 20 40 11 5 25 4 25 11 38 13 23 11 5 25 1 32 20 30 4 25 1 12 5 31 27 19 26 31 14 22 5 23 1 13 3 3 3 2 3 4 2 5 3 3 4 4 6 5 3 3 3 2 3 3 2 2 3 2 2 5 3 8 4 2 2 4 2 4 6 3 4 6 4 4 2 2 3 3 3 3 3 2 4 5 2 2 4 7 2 4 2 3 6 5 4 3 3 3 1 3 5 4 2 4 3 3 2 5 6 3 3 2 2 2 2 1 2 3 3 4 3 2 6 37 6 3 3 1 3 3 3 3 2 2 1 2 3 4 4 6 3 5 9 7 4 4 6 4 4 2 2 3 4 4 4 3 4 3 6 7 8 2 3 4 4 5 4 4 11 7 8 9 7 10 7 1 2 4 5 4 3 4 4 4 4 2 6 7 17 17 17 296 363 363 51 51 51 491 491 491 491 320 320 159 159 159 159 314 35 196 196 473 258 258 258 31 342 224 494 494 494 368 453 142 142 397 147 380 329 329 329 329 143 36 310 107 395 134 302 302 497 497 251 251 251 241 431 278 278 278 330 388 195 64 212 131 133 133 141 141 141 281 453 142 221 336 174 174 174 174 348 199 223 223 223 130 198 198 124 124 124 124 124 368 31 342 86 221 221 336 445 445 445 351 351 171 171 171 252 215 29 134 134 134 8 259 354 100 100 497 497 497 122 129 259 144 208 208 190 487 487 213 213 213 252 143 36 310 107 395 334 334 334 304 304 185 49 269 342 224 224 489 489 489 143 144 27 389 389 116 33 250 217 217 473 365 365 365 330 94 199 469 469 469 24 36 310 447 447 447 6 127 222 222 222 245 245 14 411 411 350 350 350 350 413 64 394 465 465 27 27 121 116 33 394 478 478 232 172 224 273 470 498 308 308 467 299 388 379 471 471 49 342 168 89 194 194 446 322 64 212 198 114 114 84 496 496 274 318 49 269 342 224 69 462 130 129 402 106 493 493 493 216 300 300 382 245 349 205 261 261 25 496 496 496 496 274 274 233 96 270 433 342 168 340 340 116 33 36 377 123 123 216 283 455 8 354 106 306 306 306 306 396 396 416 416 192 192 275 275 116 303 303 48 48 229 170 491 491 312 312 312 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 260 408 408 408 391 391 391 228 491 491 373 338 400 400 400 30 378 378 141 141 141 281 342 342 44 44 44 94 331 84 84 496 496 285 449 134 134 8 100 100 497 497 58 72 72 268 268 268 268 268 169 186 39 54 142 397 397 276 346 346 346 428 85 146 146 358 352 352 352 352 352 352 417 417 417 417 237 491 435 225 225 225 72 156 156 156 156 245 245 43 364 276 276 109 109 498 396 396 178 143 259 458 208 345 141 141 281 281 9 168 106 297 297 297 297 297 297 43 43 345 109 109 171 171 368 368 342 342 221 336 371 180 180 319 319 319 319 282 388 195 195 195 117 404 335 440 209 83 194 194 194 194 446 446 64 212 131 133 364 364 276 109 109 443 139 139 139 293 293 497 122 239 36 371 180 319 319 319 319 282 303 303 303 303 117 404 439 439 439 78 237 47 47 47 80 491 80 491 373 373 338 338 338 400 400 400 30 30 246 246 246 3 3 197 197 7 42 147 147 147 380 210 210 210 210 486 365 282 282 282 388 388 195 195 199 404 404 197 197 216 22 283 283 455 38 162 482 115 273 273 84 496 88 88 176 176 176 328 200 248 478 66 172 115 273 498 498 498 245 143 458 458 302 302 302 302 375 98 98 229 247 247 15 15 193 193 193 17 +103-1240-0005 103 758 1 18 13 23 29 33 30 5 25 12 5 31 5 25 11 15 31 22 36 23 4 25 11 38 5 40 12 5 31 33 30 6 26 17 5 31 33 29 30 3 29 5 37 1 12 5 10 14 10 1 15 11 31 5 31 8 5 33 20 4 25 11 16 6 30 5 25 24 19 32 5 25 40 3 17 40 19 23 39 14 20 1 39 13 33 38 19 12 6 23 12 19 31 1 24 19 31 19 40 30 15 10 5 23 16 7 25 11 1 5 9 5 25 11 5 25 33 8 24 33 5 31 19 33 16 14 7 14 40 4 33 18 14 22 19 10 5 25 38 19 25 11 27 1 25 19 33 19 26 22 3 33 5 25 38 6 30 29 22 38 19 23 33 31 1 32 20 18 4 11 25 19 33 19 11 31 19 22 31 33 20 25 5 37 12 13 24 1 9 5 2 4 3 3 5 4 4 2 2 6 3 3 3 5 6 2 7 5 6 2 3 3 2 3 2 3 4 4 2 3 3 3 2 4 3 4 4 4 4 1 2 2 1 3 7 5 5 2 7 4 3 2 6 7 3 3 6 5 2 2 6 5 3 2 2 3 2 6 2 5 2 4 4 4 2 4 2 5 9 30 4 2 4 2 2 3 5 6 3 7 11 2 5 2 5 2 5 4 3 3 3 3 5 6 2 2 1 4 6 3 7 2 4 2 10 7 2 3 2 5 2 3 4 6 10 4 4 2 2 3 3 4 2 6 2 3 4 2 4 2 9 14 4 3 3 3 7 9 4 3 1 5 4 3 3 3 8 3 2 6 2 9 12 7 3 2 2 2 2 4 2 3 4 4 2 3 4 4 3 2 3 3 1 4 5 7 17 17 17 363 363 363 51 51 228 491 373 72 110 110 139 139 139 293 293 215 35 96 96 6 472 472 133 42 147 380 499 499 319 319 319 348 195 195 466 22 283 283 38 162 68 68 68 273 273 319 319 319 348 33 64 212 212 93 93 93 93 171 422 186 39 86 86 105 105 336 208 153 153 153 153 182 182 375 375 497 98 98 483 440 83 83 55 55 55 322 67 212 131 133 345 141 141 141 141 281 9 198 198 22 283 455 38 162 482 482 482 238 6 161 161 499 499 235 235 235 235 348 64 212 459 459 459 459 31 54 86 6 272 472 221 336 259 190 190 190 488 499 499 405 405 206 215 215 35 29 69 69 223 130 198 198 22 283 455 236 129 36 310 107 395 395 487 498 498 498 396 178 36 310 107 447 483 226 226 209 411 171 171 171 171 252 252 143 77 478 342 224 494 494 494 31 342 342 115 273 470 265 265 265 85 85 85 146 469 469 469 36 449 41 41 41 324 324 3 335 440 145 194 194 446 446 67 76 90 393 393 234 261 25 148 148 148 148 372 372 467 467 467 242 116 33 250 217 217 473 473 278 278 99 436 436 60 60 298 379 379 195 471 471 49 49 168 106 106 405 167 215 35 458 96 368 453 453 371 278 278 139 175 81 324 324 219 495 495 495 495 467 41 41 41 41 19 454 454 229 491 491 312 312 312 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 408 408 408 149 228 491 491 320 219 357 357 357 240 240 131 133 133 345 333 333 220 220 216 106 106 297 297 297 297 297 297 293 293 293 122 4 127 114 258 258 258 258 258 271 271 39 433 433 433 433 160 160 160 97 97 225 225 225 225 7 217 473 258 258 258 258 342 342 224 494 494 494 368 9 142 142 397 147 380 329 329 329 329 329 310 107 107 395 302 302 497 497 349 349 205 261 261 25 315 315 315 450 450 413 413 212 131 335 226 209 44 44 44 236 8 32 259 354 180 319 319 319 348 348 64 64 64 64 212 384 34 11 11 11 116 33 243 243 401 401 491 108 119 119 437 103 103 103 103 103 85 85 299 299 339 64 76 36 87 87 66 66 68 68 115 278 278 278 36 131 393 393 155 155 332 332 332 14 14 14 411 145 145 284 315 315 450 450 153 88 372 372 304 304 185 49 453 168 415 415 415 415 58 183 156 156 156 313 143 458 458 445 351 278 278 278 36 310 107 107 395 242 116 116 250 250 250 276 109 109 278 330 116 33 64 212 212 371 84 84 84 84 274 274 263 229 247 247 126 126 326 326 326 326 326 101 101 149 228 228 289 320 309 309 479 278 278 278 325 449 449 176 176 135 328 200 200 195 248 197 197 197 401 491 144 27 27 437 437 405 405 405 206 167 35 35 242 242 242 33 33 33 250 250 364 276 276 153 153 372 372 372 215 215 35 472 472 221 401 491 208 208 441 441 441 441 109 278 139 139 139 375 375 375 233 233 270 270 433 433 160 160 18 112 112 439 439 439 225 237 237 237 47 491 47 491 491 73 491 373 373 338 400 400 400 30 30 58 110 254 254 254 314 196 196 479 331 278 278 278 325 449 191 191 191 314 314 478 478 68 68 115 273 278 278 143 96 96 232 68 68 68 6 272 371 444 360 360 252 339 199 199 223 223 130 402 198 198 114 57 57 57 203 381 381 48 229 491 247 15 193 193 193 17 +103-1240-0006 103 478 1 4 40 4 37 5 25 23 20 18 7 31 22 20 29 14 40 38 14 38 27 25 33 19 33 13 23 19 25 1 6 11 37 28 31 5 40 1 4 25 11 22 20 29 19 26 5 32 3 30 29 8 3 25 12 5 24 15 25 30 27 11 12 5 33 22 30 6 31 33 12 5 18 3 23 27 1 5 25 11 38 7 25 11 5 29 12 5 31 33 20 29 30 13 11 18 19 23 9 20 6 25 11 1 8 7 4 5 2 3 2 3 4 5 4 4 4 4 3 4 2 4 2 4 5 2 3 3 6 4 5 3 4 1 13 2 5 6 6 5 7 16 5 2 2 5 5 2 3 4 2 8 4 3 3 12 4 3 2 1 5 8 6 6 7 4 1 2 2 5 3 3 6 4 1 2 6 2 5 8 12 6 1 4 5 10 2 3 3 5 2 3 6 4 4 3 7 3 3 7 3 5 4 4 8 4 7 20 17 17 17 363 51 51 228 491 412 83 145 253 253 253 253 368 342 168 168 145 145 486 460 460 173 280 29 242 242 242 359 359 359 81 324 324 324 3 58 72 268 268 268 268 268 268 274 186 39 54 86 105 336 445 485 485 213 485 215 129 354 29 334 304 304 185 131 397 397 345 347 347 347 347 43 43 364 276 174 174 426 426 206 167 457 76 36 377 87 87 87 236 259 108 119 119 351 351 443 139 139 139 293 175 175 81 89 340 340 116 33 335 14 14 491 411 411 284 284 284 405 405 405 206 206 206 37 24 131 133 4 4 280 153 153 343 343 343 343 358 358 39 342 342 224 50 50 50 50 50 50 185 269 433 160 112 427 82 247 312 126 292 292 292 326 326 326 326 326 101 408 149 149 491 412 412 55 55 55 322 67 131 472 221 458 445 445 213 213 213 213 252 215 458 176 176 135 135 135 200 200 44 44 44 44 99 338 338 338 338 395 273 106 306 306 306 396 396 215 35 35 335 14 14 411 284 265 265 265 265 85 85 146 464 464 125 125 125 125 466 466 22 283 455 399 217 217 217 473 290 290 290 290 290 434 434 434 339 339 33 90 42 42 147 147 380 380 288 496 496 496 496 274 274 274 24 131 472 198 198 127 45 45 385 90 221 458 208 208 190 499 499 499 405 405 206 150 150 54 86 238 6 6 472 472 198 22 283 455 38 72 72 437 437 481 481 481 481 175 175 81 84 84 84 84 274 274 98 229 247 247 126 126 326 326 326 326 326 101 149 149 228 491 83 83 55 55 322 67 67 131 133 133 364 276 276 346 346 486 315 315 315 315 450 450 450 413 413 348 64 212 131 230 230 230 230 230 35 35 401 198 198 22 283 455 38 162 232 232 232 68 68 6 371 371 213 213 213 252 215 129 259 29 29 42 42 42 147 380 288 443 443 443 240 314 131 183 183 183 183 183 278 278 278 139 139 139 497 497 497 497 122 259 259 354 420 420 324 464 180 180 426 426 426 426 426 282 388 303 303 64 212 465 227 419 439 78 491 305 421 491 491 491 421 491 421 491 491 491 128 128 128 491 128 193 193 193 17 +103-1240-0007 103 751 1 13 25 20 9 5 11 20 18 36 38 13 25 33 7 33 5 37 19 33 1 6 30 19 25 33 36 19 33 1 18 4 11 33 19 29 4 31 27 37 14 12 4 33 18 19 23 30 27 11 1 4 25 11 31 27 1 30 5 25 12 20 5 25 31 20 25 17 6 25 33 23 5 33 5 37 24 19 31 19 40 30 15 10 5 23 40 6 23 31 20 19 26 8 1 32 20 38 5 40 31 19 33 19 26 12 13 30 38 5 25 4 16 33 14 25 36 25 19 25 1 14 23 20 21 36 25 1 12 5 31 5 25 38 5 40 22 5 24 19 26 19 25 4 33 12 5 38 19 25 11 27 1 38 6 30 24 5 25 11 9 30 8 33 1 11 6 3 4 2 4 2 4 4 3 5 3 3 3 6 3 2 3 6 7 2 6 5 3 5 4 4 5 4 11 6 1 3 2 3 5 7 5 5 2 4 2 4 3 6 2 8 5 9 6 2 4 3 3 6 14 15 5 3 4 3 3 6 6 7 5 5 3 5 4 3 2 4 2 2 4 4 2 4 3 4 5 4 4 4 3 5 5 7 7 7 2 4 14 50 6 3 2 2 3 3 2 2 3 4 2 3 4 4 2 4 3 4 2 3 3 5 2 3 3 1 7 3 5 6 7 8 22 4 3 6 5 3 2 2 3 5 3 2 2 5 3 2 1 3 2 3 3 3 4 3 9 1 4 4 4 4 1 2 3 3 3 7 6 10 17 17 17 363 363 363 363 51 149 228 491 491 411 145 475 475 475 475 94 475 475 475 324 301 8 354 106 493 151 240 325 41 41 324 324 3 183 183 489 489 489 489 489 43 43 276 109 109 443 330 330 348 64 76 465 449 483 145 113 113 113 113 113 240 285 285 34 223 223 130 280 277 277 277 277 277 385 36 36 227 419 225 225 226 226 226 491 209 157 157 157 157 157 372 335 14 14 411 188 340 340 116 33 64 394 465 108 377 123 123 123 88 88 277 277 277 277 385 24 131 427 229 247 126 126 126 326 326 326 101 408 149 491 228 373 110 110 110 254 254 240 314 35 108 377 87 87 87 129 259 74 311 311 311 311 311 311 311 311 169 150 342 342 342 168 106 410 410 410 410 410 29 29 382 313 216 216 114 92 92 92 92 92 385 131 472 183 183 183 351 278 278 139 139 139 497 497 497 497 42 42 8 147 380 380 499 84 496 496 496 496 274 274 274 37 24 131 419 419 225 225 225 225 82 83 55 55 55 322 67 394 478 478 232 232 172 172 115 273 84 84 84 84 16 16 16 274 274 274 98 13 229 247 312 126 126 23 23 23 101 101 101 149 149 228 491 289 289 7 147 147 380 499 319 319 319 348 466 466 466 212 22 448 448 448 14 14 145 319 319 319 319 348 195 195 195 394 478 478 232 68 68 68 267 267 267 267 267 434 339 339 33 90 90 32 465 144 27 180 284 405 426 426 413 348 64 76 26 26 26 359 81 81 277 277 385 325 34 69 223 130 130 402 196 196 217 473 473 258 258 31 342 224 494 494 494 494 368 9 142 142 42 42 147 380 329 329 329 329 252 143 36 107 107 395 302 302 302 497 497 185 269 9 9 483 14 411 411 297 297 297 297 297 297 293 293 497 186 162 68 68 172 115 267 267 267 267 360 360 176 176 176 135 328 328 200 199 106 106 265 265 265 265 85 85 85 85 207 207 19 454 13 417 417 417 237 237 170 28 28 28 28 28 362 491 491 362 362 362 362 491 491 362 211 491 491 369 369 369 369 21 21 21 21 21 21 21 21 21 21 21 21 21 21 408 408 408 408 391 391 73 491 289 373 338 338 400 400 400 301 378 378 141 141 141 281 162 68 68 115 470 278 278 278 449 176 176 135 135 200 248 248 127 114 264 264 264 468 245 245 43 364 276 174 174 174 174 348 94 199 145 145 460 460 460 402 402 6 272 300 469 313 313 10 479 398 398 374 374 132 413 339 199 199 340 116 116 199 335 14 411 411 498 498 498 498 134 175 359 81 166 324 324 422 236 36 310 107 395 395 485 374 374 374 374 132 132 413 303 303 303 117 404 439 439 78 237 237 237 47 47 491 47 2 491 2 2 2 2 316 316 491 316 316 491 491 73 435 289 7 127 5 5 5 38 162 68 68 68 115 273 319 319 319 319 348 348 195 250 133 141 141 141 281 342 86 221 458 144 27 351 319 319 319 53 53 176 135 135 200 200 200 464 340 340 340 94 199 415 415 415 35 198 22 283 455 455 364 345 109 278 278 330 116 33 64 212 212 371 84 84 84 84 274 274 274 274 43 43 401 364 276 276 153 153 153 387 387 372 372 396 396 203 53 473 89 446 446 67 131 472 221 401 354 190 380 380 499 499 428 428 146 146 358 358 233 233 227 419 419 427 56 491 421 15 15 15 193 193 193 17 +103-1240-0008 103 771 1 12 20 6 30 10 14 11 3 25 12 5 31 23 27 29 9 19 23 27 12 5 18 7 31 38 5 40 19 25 5 9 30 8 11 5 23 16 23 5 32 5 37 29 19 26 22 20 38 8 33 9 23 36 24 1 18 5 24 11 27 37 14 9 8 5 24 19 30 20 5 11 5 37 1 9 20 40 1 33 3 24 5 31 23 19 25 11 1 5 24 20 22 23 19 33 5 23 24 4 25 18 36 24 4 37 5 25 23 20 29 20 29 5 23 22 6 23 11 30 15 10 5 23 19 25 11 40 18 5 40 9 5 25 11 1 38 5 40 31 27 19 26 18 19 40 23 15 33 14 25 5 29 31 20 11 3 25 12 5 18 19 23 16 20 23 11 1 9 20 6 25 11 12 5 9 3 30 25 1 11 4 3 6 3 5 2 3 3 3 2 2 7 2 5 3 2 3 6 7 1 3 6 9 6 2 2 3 2 2 2 4 4 5 2 2 3 9 2 5 7 2 4 6 2 5 4 4 5 5 6 2 4 7 6 14 6 2 8 3 5 4 3 4 6 2 5 3 5 4 2 2 2 4 1 3 11 10 25 5 4 3 3 5 3 5 5 10 9 6 5 4 4 4 2 2 3 3 5 7 3 4 2 3 7 2 3 2 2 5 5 5 3 3 3 5 3 5 5 3 3 4 3 8 4 6 2 2 4 4 4 3 2 4 5 19 3 1 4 4 5 3 3 3 3 3 5 4 9 3 3 3 3 6 4 3 3 3 1 2 5 2 5 6 4 5 3 1 3 4 6 4 2 1 2 4 5 4 6 9 17 17 17 363 363 363 363 51 51 51 228 491 320 127 448 448 448 14 14 411 153 153 387 372 372 396 313 35 310 107 395 382 382 313 313 285 34 125 125 125 125 348 466 22 283 283 38 162 232 232 232 26 26 26 431 431 84 496 496 274 274 457 457 401 401 354 354 255 255 251 251 251 241 431 84 84 84 16 16 274 274 216 216 283 283 455 58 72 72 72 268 268 268 268 268 268 450 450 274 271 271 39 39 86 142 397 336 345 141 141 281 281 342 168 340 340 116 199 44 44 44 129 259 190 190 380 380 499 499 428 85 146 146 285 34 302 302 497 497 349 349 234 234 234 234 234 261 425 425 386 431 151 151 151 169 169 169 99 436 338 338 447 395 69 462 462 402 402 221 401 259 491 74 351 351 360 360 360 200 200 248 76 76 465 445 485 324 324 324 301 378 364 364 346 346 346 428 428 146 146 143 36 472 221 401 401 259 354 425 425 241 431 374 374 374 374 132 132 132 203 381 381 404 13 491 247 312 126 126 326 326 326 326 101 101 101 149 228 491 491 373 72 72 437 284 319 319 319 203 53 53 53 53 469 212 212 131 34 410 410 410 410 410 173 280 29 29 382 245 245 8 259 354 62 62 62 62 146 464 464 44 44 399 217 217 217 473 286 286 286 468 468 406 337 337 337 324 464 464 277 277 325 34 462 462 462 402 402 221 401 401 354 213 213 213 213 213 246 246 246 246 318 318 185 185 433 433 433 160 160 112 112 78 56 491 491 28 28 491 491 341 341 341 341 12 12 12 12 12 260 260 260 260 391 391 391 73 289 491 289 108 119 437 437 284 284 426 426 203 53 473 459 271 31 39 342 342 26 26 251 251 241 81 329 120 120 330 388 195 195 195 64 212 131 419 439 439 439 439 225 225 225 237 47 491 47 80 80 491 80 197 225 287 287 44 44 44 399 217 217 473 398 213 213 213 143 143 458 144 26 26 251 241 431 278 278 285 449 302 302 497 497 399 399 217 217 473 136 136 136 136 136 136 136 282 282 388 195 404 58 489 489 489 489 489 399 53 335 14 145 145 145 486 460 460 173 280 29 242 242 116 250 359 359 81 324 324 324 422 129 259 74 485 213 213 213 213 252 215 129 259 354 100 100 100 497 497 122 143 458 144 27 437 481 481 481 481 481 293 293 122 122 472 133 42 147 147 380 329 329 171 252 143 36 107 395 302 302 302 497 497 497 251 251 251 241 81 431 278 278 330 388 379 195 195 471 471 77 269 342 142 72 72 72 437 151 151 151 368 453 342 142 221 336 354 275 275 275 275 303 303 195 243 131 419 427 491 247 126 126 126 292 326 326 326 326 326 326 326 326 326 101 101 149 149 228 320 345 141 141 281 162 232 232 172 172 115 273 84 496 88 88 88 176 176 135 135 200 248 183 183 257 257 257 257 453 342 26 26 251 241 241 431 171 171 171 252 457 457 401 259 108 119 119 351 308 308 308 313 313 94 199 469 469 215 35 96 66 68 68 68 115 115 444 444 213 246 252 252 325 34 125 125 125 125 466 466 22 283 455 58 72 72 351 278 278 139 139 293 497 497 349 349 234 234 261 25 485 485 485 464 139 139 375 497 497 122 122 36 472 221 336 354 420 420 324 464 464 180 106 426 426 426 426 413 348 64 212 212 198 22 283 455 8 354 354 106 284 306 306 306 306 306 396 396 396 37 303 303 48 404 78 229 491 491 15 15 193 193 193 17 +103-1240-0009 103 501 1 24 19 31 19 40 30 15 10 5 23 25 39 36 12 5 33 18 20 6 33 1 9 19 22 5 40 32 20 18 4 11 18 14 11 18 19 24 33 13 23 29 20 33 14 24 6 30 19 31 5 25 12 20 37 25 19 26 9 19 16 6 30 19 25 1 38 19 23 39 5 24 21 15 9 23 13 30 40 31 33 6 30 27 37 14 4 33 22 3 30 24 5 11 20 1 12 4 33 18 20 24 13 25 33 19 31 27 18 19 40 33 14 25 5 29 31 20 11 12 5 25 13 22 31 33 4 16 33 14 25 36 25 1 10 4 2 4 2 4 3 2 3 3 3 3 4 3 2 2 2 4 4 9 4 2 2 3 5 4 2 6 4 3 2 2 5 5 2 2 2 4 5 3 4 3 4 3 3 4 4 2 3 5 1 3 2 9 2 2 2 3 2 3 6 6 8 6 7 5 7 2 2 4 3 3 5 6 4 3 3 4 3 4 3 6 7 7 3 3 2 2 5 3 3 2 2 2 7 15 3 2 3 5 3 3 2 4 5 2 6 5 3 1 4 5 3 2 3 3 4 4 2 1 2 3 3 3 2 3 3 4 2 2 3 5 6 15 17 17 17 363 363 363 51 51 51 228 491 7 217 473 258 258 31 342 342 494 494 494 281 9 142 397 147 329 329 329 329 143 310 107 302 302 302 497 122 10 10 309 398 398 398 398 398 374 132 216 216 127 45 45 45 325 183 451 30 30 30 3 14 14 411 284 284 405 405 405 206 206 167 24 227 227 472 221 401 491 354 420 420 422 143 458 144 27 351 351 151 253 368 368 99 338 338 338 400 400 400 400 30 3 58 58 110 254 254 254 254 58 58 72 72 110 498 498 498 498 396 313 325 183 183 57 57 57 203 53 394 90 76 108 108 119 351 139 139 139 139 293 293 215 35 74 74 329 329 213 329 252 325 300 382 382 245 399 217 70 65 65 153 329 372 406 406 467 313 186 39 342 342 224 242 242 116 466 466 22 283 448 448 14 411 213 213 213 213 173 173 402 196 196 176 328 328 248 248 8 354 255 255 38 349 205 234 261 148 148 148 148 148 148 372 372 372 59 452 335 197 226 226 209 188 188 340 340 340 340 33 195 117 117 117 197 197 197 80 491 80 491 491 7 7 7 364 345 109 329 139 329 81 219 219 485 464 464 203 203 33 394 212 465 107 395 329 329 329 171 171 171 301 301 8 129 354 425 175 175 431 329 329 264 468 468 304 313 186 162 323 482 482 482 238 6 272 106 153 153 153 182 372 372 372 372 59 245 335 14 209 411 410 410 410 410 410 410 173 29 29 495 406 467 415 415 131 90 259 144 27 437 437 306 306 306 306 396 203 53 469 469 469 325 325 41 41 41 19 19 454 229 247 126 126 126 326 326 326 326 326 326 101 149 149 228 289 491 127 45 45 45 45 240 183 183 183 451 30 30 30 301 399 217 473 432 432 432 330 348 64 457 401 82 108 377 87 87 38 162 323 323 115 273 84 84 496 274 274 58 58 183 257 257 257 31 9 238 6 119 161 308 308 308 396 313 94 199 199 459 215 215 96 66 342 172 224 41 41 324 3 301 314 198 22 283 455 116 199 331 443 443 178 178 458 96 86 238 6 272 145 145 460 460 460 402 402 6 272 300 469 313 10 94 398 398 374 374 374 132 413 303 303 48 404 13 170 491 491 491 312 15 15 292 292 292 193 193 193 193 17 +103-1240-0010 103 740 1 29 20 33 14 18 4 11 4 31 22 33 19 24 5 37 22 6 30 31 16 14 24 4 34 39 36 22 5 34 9 14 33 18 4 11 25 13 37 14 9 19 25 27 25 33 36 37 3 23 5 25 33 19 30 19 25 16 14 24 15 32 5 25 5 9 7 33 13 25 20 34 19 26 19 25 18 19 40 18 27 23 8 16 1 4 25 11 39 13 33 1 18 20 30 38 5 40 24 4 34 39 36 22 5 34 9 14 33 1 4 33 18 4 16 29 4 31 33 34 30 20 6 25 12 20 4 16 33 14 25 36 25 5 37 5 9 19 40 20 11 15 1 29 23 4 31 19 11 23 20 11 30 8 37 19 26 27 37 14 12 5 18 3 23 27 5 25 11 5 29 12 5 18 19 23 1 17 3 4 3 2 3 2 2 7 4 2 1 3 2 2 4 3 4 5 5 4 3 3 5 3 2 3 4 3 5 3 5 1 5 1 3 5 2 3 3 4 2 7 5 3 4 2 4 3 3 3 2 5 2 4 2 2 4 1 4 4 4 2 1 2 2 5 5 4 3 4 4 3 6 4 1 3 2 3 3 3 8 8 11 37 5 1 2 3 8 10 11 9 3 4 3 2 3 3 6 4 1 3 7 3 5 3 6 3 8 6 2 8 5 6 5 5 5 2 5 3 7 3 3 3 3 4 4 3 1 3 6 2 2 3 2 6 3 4 4 4 12 15 4 3 4 7 1 2 3 4 5 3 6 2 3 4 8 2 3 2 2 6 4 5 5 2 2 3 3 3 5 1 5 3 9 9 17 17 17 17 363 363 363 363 363 363 363 408 51 51 228 491 289 320 74 329 329 329 329 329 325 34 334 382 382 467 110 254 254 254 285 34 145 145 145 376 460 460 169 150 342 86 105 96 96 272 57 57 57 203 53 255 255 255 130 402 221 259 208 441 441 153 153 372 372 372 59 271 271 269 54 54 9 97 336 155 155 332 332 332 245 399 473 65 329 329 329 460 169 164 164 485 485 485 374 132 143 259 144 27 437 329 329 329 169 164 164 142 221 336 29 495 334 59 59 313 24 131 58 72 110 254 254 254 254 35 35 196 309 309 479 331 463 463 463 463 29 382 382 245 8 129 354 137 137 137 137 33 10 10 309 331 331 84 84 350 350 413 413 33 394 465 377 377 87 123 132 8 354 354 106 284 481 481 481 175 175 81 242 116 33 394 465 465 108 119 485 485 286 286 468 406 467 467 121 53 394 155 155 25 469 469 203 217 473 418 418 418 418 418 99 436 436 60 60 298 199 255 255 8 180 113 113 113 113 240 285 131 335 14 401 209 411 475 475 475 475 475 475 475 475 422 164 164 164 214 214 214 214 328 328 200 200 248 335 188 188 340 340 94 199 199 257 257 257 257 342 9 142 437 424 424 424 424 424 497 497 122 251 241 431 431 265 265 428 428 85 146 146 358 358 352 352 352 352 352 352 352 112 427 56 491 312 312 312 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 408 408 408 408 149 149 228 289 209 83 55 55 322 67 212 90 219 357 357 357 357 357 357 120 120 240 385 385 35 227 227 227 419 419 439 225 197 47 47 491 491 47 491 47 47 80 491 491 491 289 7 373 451 451 451 286 286 286 286 286 468 468 245 43 43 345 141 141 281 281 9 142 196 217 473 65 486 486 460 460 169 164 164 164 485 485 485 374 132 143 259 144 144 27 437 329 329 329 329 150 164 164 105 221 336 354 29 498 498 59 59 313 385 227 419 427 229 247 408 149 149 228 226 491 209 83 415 415 415 240 314 335 58 72 72 72 72 72 110 110 486 486 460 460 460 169 352 352 402 221 401 259 74 311 311 311 311 311 311 311 169 150 86 86 238 6 272 472 472 234 164 164 487 487 487 288 288 213 213 246 246 3 335 440 125 125 125 125 466 466 448 448 448 448 464 145 145 460 460 460 349 402 96 272 469 469 469 236 94 398 374 374 374 132 132 339 94 199 69 223 130 280 44 44 44 44 32 401 401 354 354 278 278 278 368 342 342 41 41 324 324 301 239 239 384 93 93 93 93 93 207 207 207 246 19 454 229 247 247 126 126 326 326 326 326 101 101 149 228 491 80 80 491 491 74 425 425 386 386 431 486 486 460 460 169 150 342 342 342 224 494 494 416 26 359 359 166 166 166 324 301 236 401 259 161 161 79 499 499 499 265 85 85 146 146 173 173 176 176 135 135 200 248 14 14 411 410 410 410 410 410 410 173 29 29 382 313 216 283 283 455 58 72 72 72 437 481 481 481 481 481 293 175 175 81 84 84 16 88 88 89 89 446 116 64 212 384 180 230 230 230 215 35 35 96 198 198 22 283 455 455 58 183 278 278 278 278 139 139 139 375 375 375 375 98 13 229 491 170 491 15 15 15 193 193 17 +103-1240-0011 103 732 1 4 25 11 18 19 40 9 13 31 33 31 36 33 5 37 22 23 27 12 40 1 38 19 10 38 5 40 29 23 15 25 29 30 36 16 12 5 33 18 20 38 5 40 17 27 19 26 7 33 5 37 4 37 5 25 23 20 1 4 25 11 18 20 18 4 11 12 5 9 5 17 20 4 25 11 12 5 31 6 30 5 23 24 13 30 38 19 10 9 19 33 27 22 5 25 11 12 5 33 18 20 38 5 40 17 27 19 26 5 22 5 25 31 19 11 14 5 9 5 23 11 19 31 33 5 25 31 1 25 7 1 38 13 30 38 5 40 24 4 34 39 36 22 5 34 9 14 33 1 17 27 19 26 1 4 25 11 38 8 38 5 40 18 20 17 27 19 25 12 13 30 1 5 6 1 2 2 2 4 4 4 5 3 5 4 2 1 4 5 3 7 2 7 13 4 2 4 2 2 4 7 3 6 6 4 3 6 5 2 2 2 4 2 3 2 3 3 4 3 4 8 3 2 3 5 3 2 4 2 11 33 3 2 1 2 4 4 3 4 1 3 7 5 2 7 1 2 1 2 2 6 4 2 3 2 5 7 5 3 2 4 2 3 6 4 3 3 1 2 1 2 1 2 2 3 1 4 3 4 3 3 2 4 2 2 7 1 3 3 3 2 2 2 4 3 5 2 2 5 9 28 5 13 6 10 8 3 6 2 4 6 7 4 2 3 8 3 6 3 3 4 4 4 7 3 11 2 5 3 3 6 6 3 2 3 2 3 6 4 6 5 2 2 11 21 17 17 363 51 228 412 412 83 194 194 446 67 67 131 183 257 257 257 257 453 342 221 221 336 354 354 443 443 443 169 150 342 86 86 6 6 272 472 66 482 482 115 485 374 374 132 252 36 449 462 462 402 402 221 336 144 208 425 386 386 431 496 496 496 496 496 274 274 37 233 185 185 269 323 18 427 427 247 247 126 126 292 23 23 408 408 391 391 228 228 289 491 320 407 407 407 407 310 107 397 397 141 141 141 281 281 9 142 221 221 336 491 74 74 425 425 386 386 431 290 290 290 290 434 434 339 339 195 33 394 76 465 74 190 190 190 487 487 374 374 374 132 132 358 352 352 352 402 198 198 45 45 45 45 131 183 451 30 30 30 301 378 345 141 141 281 453 9 221 336 144 180 84 496 88 88 176 176 135 328 200 335 14 14 145 145 113 113 113 113 206 285 449 34 69 223 130 280 180 145 145 486 460 460 173 280 280 242 242 116 33 250 251 241 81 256 444 213 246 246 246 19 19 454 454 78 170 170 491 28 491 491 312 312 187 292 292 12 12 21 21 21 21 21 21 21 21 21 21 408 408 408 408 149 149 491 491 289 491 209 83 55 322 67 325 30 30 30 30 3 58 58 72 110 110 254 254 254 254 314 35 198 127 283 455 455 236 129 401 401 401 354 354 431 151 151 240 416 416 192 41 41 41 324 3 464 89 89 446 348 466 22 283 455 38 162 232 482 172 115 106 106 153 372 372 372 406 467 302 302 497 497 399 399 217 473 473 264 264 264 264 264 468 468 59 59 59 245 43 364 345 407 407 407 310 107 447 221 336 354 420 420 236 129 36 108 119 119 351 496 496 496 274 143 458 192 242 242 116 116 466 212 45 45 45 325 183 30 30 301 378 141 141 141 281 342 9 221 336 144 180 84 88 88 88 176 135 135 200 200 464 44 44 143 458 27 27 121 121 33 478 478 232 68 172 115 273 278 278 278 285 495 495 495 134 134 134 134 8 100 100 100 497 122 401 401 401 371 278 278 278 31 39 86 86 6 272 11 11 11 11 379 379 471 471 270 433 433 433 18 112 56 56 491 312 312 312 187 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 408 408 408 408 149 228 228 289 491 7 309 479 331 315 315 315 315 450 450 16 293 293 335 197 197 197 197 197 197 197 197 491 491 7 7 364 364 364 364 276 181 181 181 181 181 264 264 264 264 468 468 468 245 245 43 364 364 430 430 430 430 430 342 342 221 196 217 217 217 217 473 486 486 486 486 460 169 169 164 164 164 485 219 485 485 132 143 143 129 82 144 27 27 437 329 329 151 169 164 164 164 164 221 401 259 29 29 382 313 313 35 131 472 221 401 401 401 401 491 144 180 180 84 84 350 88 88 88 176 176 176 176 328 328 200 200 200 117 454 404 483 226 226 226 226 491 83 55 55 55 322 67 67 212 131 133 364 364 276 181 346 346 181 265 85 85 146 378 378 345 430 430 430 430 342 342 451 30 30 324 422 143 401 401 144 27 180 84 84 496 88 88 88 176 176 135 135 200 248 248 248 216 127 114 114 264 264 264 264 264 59 59 452 452 263 263 417 417 237 491 237 237 421 421 421 491 491 491 128 128 128 491 128 128 193 193 193 17 +103-1240-0012 103 759 1 18 4 11 19 33 9 19 25 13 25 20 5 12 14 24 4 25 19 25 4 37 5 25 23 20 1 24 19 31 19 40 30 15 10 5 23 1 11 13 16 33 23 20 29 35 33 19 26 12 19 31 4 25 11 12 4 33 5 17 13 12 14 1 24 8 33 18 4 37 17 19 37 5 25 5 29 30 19 33 20 17 35 11 17 13 31 4 40 33 5 9 27 34 22 38 13 31 10 5 25 40 1 9 5 33 24 4 34 39 36 31 27 30 13 30 23 20 38 13 25 33 16 14 24 18 27 24 1 12 5 33 19 33 24 5 31 33 9 20 31 5 24 34 19 26 29 30 13 31 19 26 4 25 11 5 25 39 36 41 38 5 23 38 19 10 38 5 40 33 15 22 19 26 18 19 24 1 24 9 3 2 3 3 3 4 3 8 3 5 4 3 3 5 6 2 3 3 5 3 2 5 2 11 18 4 3 5 2 5 4 4 3 4 8 3 2 5 4 3 2 3 4 3 3 3 7 2 4 7 5 2 2 3 5 6 2 4 3 4 7 13 5 5 2 3 2 3 3 3 2 4 2 3 5 3 1 2 4 4 3 5 3 8 6 2 4 3 2 3 5 3 3 2 3 3 5 3 6 6 40 2 2 3 5 7 5 3 3 7 9 5 4 4 3 4 5 2 4 2 4 2 3 7 9 5 8 3 2 3 2 2 5 3 4 5 2 3 7 2 4 3 2 5 8 2 2 7 3 5 1 2 2 4 3 5 5 5 1 2 3 3 2 4 2 2 4 5 4 3 2 3 3 4 5 7 17 17 17 296 296 317 317 491 491 317 305 305 461 491 461 491 491 461 491 491 435 435 435 435 435 435 7 373 72 72 430 430 430 430 430 430 430 34 177 177 177 236 35 401 259 354 137 137 137 137 137 94 199 335 14 14 411 411 475 475 475 475 475 475 475 475 324 324 464 464 493 493 493 493 493 216 300 300 382 245 399 217 217 473 136 136 136 136 136 136 282 94 199 340 340 340 94 199 145 145 486 486 460 460 173 280 29 242 242 116 379 33 250 251 241 81 444 444 213 246 246 246 19 19 454 229 247 247 126 126 292 326 326 326 326 326 326 326 326 326 101 101 149 149 228 289 7 217 473 258 258 258 258 342 342 342 494 494 494 368 453 9 142 397 147 380 329 329 329 329 329 329 36 310 107 395 302 302 302 375 497 98 98 98 225 225 225 225 80 80 259 384 371 180 443 443 169 169 352 352 402 6 6 26 359 166 166 166 301 129 259 259 74 189 189 189 285 449 449 176 176 135 328 200 200 248 248 32 32 127 114 114 258 258 258 31 39 86 68 68 68 483 483 440 89 194 446 446 33 212 212 198 127 114 92 92 92 92 167 167 457 457 36 108 377 123 123 416 458 445 180 180 443 493 493 216 300 300 334 59 59 452 263 229 247 247 126 126 326 326 326 326 101 101 149 149 228 228 491 7 70 70 65 65 428 428 428 146 438 325 449 34 202 202 202 202 402 221 259 144 445 278 278 173 173 280 29 242 242 116 94 199 44 44 44 129 129 259 74 190 190 104 104 104 325 325 41 324 324 301 416 239 144 144 484 484 484 236 314 131 221 401 259 445 445 180 443 443 443 443 120 120 271 271 39 342 342 224 253 253 253 253 31 86 238 6 272 123 123 123 8 354 106 496 496 496 274 368 342 142 221 336 208 208 441 151 151 151 169 150 99 238 6 6 310 107 60 298 298 298 275 303 303 471 471 471 269 433 18 112 427 491 491 312 312 312 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 408 408 391 163 491 316 491 316 316 316 316 491 316 316 316 73 289 289 320 159 159 159 159 35 35 196 196 217 473 473 329 329 329 329 329 329 169 169 164 164 164 485 485 485 485 374 132 422 186 162 232 482 172 115 344 344 344 344 344 274 274 274 42 42 364 147 147 380 288 264 264 264 468 468 468 313 134 359 359 166 166 166 301 301 43 364 276 109 109 189 330 330 33 64 76 131 472 393 155 155 165 165 165 165 53 58 58 72 72 72 72 437 350 350 350 350 350 350 350 182 413 413 381 381 404 404 225 225 225 225 225 80 80 491 491 320 127 45 45 45 45 325 177 177 177 177 457 217 217 217 70 65 65 319 169 150 150 86 86 6 272 472 472 336 354 420 420 420 422 162 232 232 68 68 115 273 231 231 231 53 53 76 465 198 214 214 214 328 200 200 248 76 129 401 491 74 190 190 190 488 488 488 151 151 169 150 342 342 68 224 176 176 176 328 328 200 200 464 89 89 446 67 212 34 106 319 319 319 348 33 33 219 219 219 219 485 374 374 374 374 374 368 368 107 161 134 134 100 100 100 497 43 43 345 407 407 407 36 310 447 397 397 141 141 141 281 86 86 238 6 119 119 295 295 295 295 295 252 143 192 192 135 135 328 200 200 183 183 57 57 57 57 57 203 381 48 48 13 13 78 491 128 491 193 17 +103-1240-0013 103 762 1 18 20 38 5 40 12 5 32 8 5 31 33 24 4 25 5 23 8 37 5 25 11 18 15 33 19 11 33 5 18 4 37 33 19 17 27 5 24 5 26 31 33 30 15 25 21 14 40 14 33 36 13 25 20 29 23 15 31 38 13 30 18 20 24 8 33 18 4 37 33 19 33 6 22 1 24 4 34 39 36 11 30 13 31 33 5 29 38 19 12 5 38 8 33 22 3 23 14 5 25 11 30 8 37 19 26 19 25 5 9 5 17 20 1 38 5 40 31 5 24 34 19 26 12 5 33 11 19 11 25 33 18 4 29 5 25 6 16 5 25 1 24 19 31 19 40 30 15 10 5 23 1 29 3 25 11 14 13 40 32 20 24 8 33 1 22 35 11 24 15 22 25 5 34 19 26 5 37 19 33 1 8 5 3 3 2 3 2 2 7 9 3 3 3 3 5 2 3 5 8 3 1 2 1 5 5 3 2 2 3 2 3 4 4 3 2 4 4 3 3 2 5 3 3 3 4 3 3 4 4 4 3 6 3 2 4 5 3 5 5 2 2 1 6 2 5 5 2 5 2 3 3 3 4 6 10 24 5 6 5 3 6 5 4 3 6 3 5 3 3 1 2 2 5 5 3 6 5 4 5 3 1 4 3 6 2 2 3 1 2 3 4 5 3 8 13 3 1 3 7 2 5 3 2 5 1 4 3 3 3 2 4 2 6 4 4 2 3 4 6 2 5 33 5 3 3 3 4 4 4 3 5 6 13 5 5 3 2 4 3 2 5 3 4 8 6 11 3 3 2 4 4 5 7 5 4 2 4 7 3 5 5 8 17 17 17 296 296 317 184 184 491 373 451 451 451 30 301 378 364 345 141 141 141 281 342 342 198 22 283 455 38 338 338 338 395 395 106 480 480 480 85 85 146 146 464 459 459 459 31 31 86 238 6 472 196 196 473 136 136 136 136 136 282 388 199 199 255 255 251 251 241 241 431 265 265 265 85 85 85 146 299 173 352 89 89 322 67 199 58 72 72 72 110 171 171 171 171 252 143 36 449 191 191 236 314 36 108 377 87 58 72 110 110 202 202 202 460 169 352 402 402 6 272 87 87 87 416 144 180 84 496 88 88 88 255 255 399 70 70 65 319 319 319 348 200 248 478 66 482 482 238 6 161 79 288 290 290 290 290 434 339 339 212 310 395 334 334 304 304 304 49 269 168 168 157 157 313 313 36 377 377 123 123 88 88 14 411 475 475 475 475 475 475 475 324 301 129 259 74 425 425 386 386 343 343 343 343 358 318 39 342 9 142 397 345 109 109 498 245 313 183 451 451 30 30 30 301 399 217 70 65 65 428 428 428 146 146 325 131 72 72 110 110 486 486 460 460 402 402 96 272 87 87 87 236 259 108 119 119 351 405 405 405 405 405 405 206 206 169 233 458 192 419 427 491 491 247 312 126 292 292 292 292 292 292 21 21 21 21 21 21 21 408 408 408 149 149 228 82 320 7 217 473 65 486 486 486 460 329 169 164 164 164 164 485 485 485 485 374 374 132 132 132 236 32 401 259 161 161 79 79 380 288 443 151 169 150 150 86 86 86 238 6 272 180 230 230 230 230 215 215 35 29 345 333 333 220 220 44 44 44 43 364 276 346 346 346 428 428 146 146 385 131 472 221 458 144 27 437 437 481 481 481 481 481 175 175 81 300 300 382 406 467 467 89 89 446 116 394 212 161 161 79 499 499 499 428 85 85 146 173 173 280 176 135 135 200 464 340 340 199 44 44 44 8 32 259 354 431 151 151 151 416 416 192 41 41 41 41 41 19 454 229 247 247 126 126 326 326 326 101 101 149 149 228 289 320 320 345 141 141 281 31 342 232 232 68 68 172 115 231 231 231 231 231 53 394 76 164 164 214 214 214 214 200 200 248 212 127 45 45 45 45 236 401 401 259 384 371 278 278 278 314 196 242 242 33 64 212 131 472 72 72 72 110 486 486 460 460 460 215 35 29 29 242 242 94 199 199 106 426 426 426 169 349 352 352 352 242 275 275 303 303 303 48 48 417 417 417 237 237 491 28 28 491 305 305 491 491 362 305 491 491 491 491 362 366 491 366 366 316 491 491 435 316 435 491 491 73 289 7 7 217 473 258 258 258 342 342 224 494 494 494 281 9 142 397 147 147 329 329 329 329 329 252 36 310 107 395 302 302 302 302 497 98 98 13 229 82 247 312 126 126 326 326 101 101 101 149 391 228 491 289 491 74 437 437 284 405 426 426 206 348 64 64 212 300 382 495 406 467 253 253 253 99 99 338 338 400 400 400 400 30 301 399 217 70 65 65 265 428 428 85 146 146 358 385 36 227 427 427 229 247 126 126 126 326 408 408 391 228 228 289 491 144 27 389 389 389 314 196 196 217 473 476 476 476 476 476 143 458 96 196 32 196 309 309 309 309 479 331 231 231 231 231 231 349 164 214 214 214 214 328 200 200 335 14 411 287 284 223 223 223 223 130 280 277 277 277 277 277 385 24 227 419 439 439 439 439 225 128 193 193 17 +103-1240-0014 103 697 1 4 25 11 18 14 4 16 33 14 25 36 25 40 19 25 21 28 24 5 25 33 38 5 40 31 29 28 23 11 1 8 23 21 19 31 33 31 33 13 29 27 37 14 33 19 17 30 20 25 17 15 9 5 23 40 4 16 33 14 33 20 5 25 11 16 8 25 11 7 33 16 14 24 3 30 19 23 5 38 13 30 18 20 40 17 6 25 5 25 11 38 8 1 12 5 38 14 12 20 38 35 24 5 25 16 8 25 5 23 20 22 5 25 22 23 36 11 19 11 1 18 20 11 5 40 5 25 21 13 25 14 5 23 20 17 27 33 19 33 7 25 12 19 31 33 8 24 5 37 39 19 30 1 5 25 11 18 20 25 13 37 14 37 19 40 19 33 31 1 7 7 1 2 4 2 7 3 3 2 2 6 4 2 3 3 4 5 4 2 3 5 2 1 3 4 3 7 5 6 52 7 5 3 4 4 5 3 4 3 4 6 2 3 2 2 3 3 5 3 2 7 3 2 2 4 6 2 3 2 7 8 1 2 3 5 6 2 2 6 4 3 2 5 2 3 3 4 3 3 4 2 4 2 4 3 5 3 2 1 3 5 11 17 2 4 6 3 3 3 4 3 2 2 4 5 5 2 2 2 3 3 2 2 4 2 4 2 2 6 31 5 2 3 3 2 3 4 4 4 2 4 2 3 3 5 3 2 3 8 6 2 2 3 3 5 4 3 2 3 3 3 6 2 4 2 1 5 2 7 3 3 3 5 2 5 4 4 8 6 17 17 17 296 296 184 184 184 412 83 194 194 194 55 322 67 131 183 156 156 156 156 335 14 145 145 460 460 460 460 349 402 96 272 272 469 469 313 94 398 398 374 374 374 132 339 339 33 471 77 342 168 121 121 121 33 33 394 310 395 395 153 153 387 387 146 146 203 217 291 291 291 291 291 64 243 36 227 472 397 397 345 141 141 281 31 162 232 232 105 105 336 354 153 153 387 387 387 387 139 139 302 302 375 375 122 122 131 227 419 439 417 417 170 491 28 28 28 28 28 491 491 362 362 491 362 491 305 362 362 491 362 491 362 435 491 211 369 491 369 369 369 369 369 21 21 21 21 21 21 21 260 260 260 260 260 260 391 391 391 491 73 491 491 412 287 111 111 111 111 139 139 293 293 122 239 36 384 395 470 459 271 271 150 342 86 238 6 6 491 478 66 68 232 238 6 272 470 470 443 443 215 215 35 354 29 410 410 410 410 410 410 280 29 29 313 236 36 377 123 123 129 259 208 79 79 288 360 360 360 434 434 200 248 248 212 445 180 171 171 171 171 171 252 215 8 354 100 302 497 497 49 342 168 180 145 486 460 460 460 169 402 402 6 272 300 382 313 236 36 108 108 119 351 213 213 213 213 246 246 246 3 464 89 446 116 394 76 90 393 234 261 25 25 480 480 480 480 299 299 339 64 212 34 180 113 113 113 113 167 167 35 401 393 155 155 165 165 165 165 53 217 217 65 329 329 495 406 467 467 134 139 175 175 423 423 423 423 43 43 364 345 109 109 264 468 468 396 58 183 451 30 30 30 368 342 342 221 336 144 180 106 426 426 426 206 388 94 199 89 446 446 212 131 133 133 276 276 346 346 346 265 265 85 85 85 207 207 19 454 417 417 417 417 417 47 491 47 47 491 435 435 80 491 491 80 80 289 320 127 5 5 455 43 43 364 276 109 498 498 498 396 313 216 216 41 324 324 301 43 364 276 174 174 174 203 53 473 242 116 195 33 90 393 349 234 261 25 106 480 480 480 146 146 299 339 250 359 359 166 166 166 143 458 144 27 121 121 121 76 458 458 208 386 386 444 374 374 374 252 325 34 191 191 191 24 131 404 427 229 247 247 126 126 292 292 292 292 292 326 326 326 326 326 326 326 326 326 326 326 326 326 326 326 101 408 408 408 408 391 491 491 373 451 451 30 30 422 325 371 71 71 71 71 71 453 242 242 348 64 394 76 401 310 107 395 395 395 432 432 330 94 199 495 495 495 134 134 134 359 81 166 166 324 422 416 458 144 180 84 496 496 274 285 449 123 123 236 236 36 108 119 351 351 315 315 315 315 450 450 413 413 413 466 198 114 258 258 258 31 86 86 6 272 119 103 103 103 103 85 299 203 53 29 462 462 462 280 280 219 219 286 286 286 286 334 59 59 452 452 263 225 225 83 55 55 55 322 67 131 183 183 451 451 30 30 434 339 10 10 10 309 479 331 463 463 463 463 29 29 382 382 245 245 349 280 280 278 278 278 368 368 342 168 168 277 277 277 37 385 233 270 270 433 390 160 112 112 439 439 78 56 128 128 193 193 17 +103-1240-0015 103 190 1 19 16 18 20 11 30 5 25 7 33 5 37 33 14 25 5 29 31 20 11 18 20 38 35 11 5 25 33 11 30 13 31 5 29 5 25 11 33 15 22 12 5 9 5 17 20 33 19 17 27 16 14 24 6 30 1 9 4 1 3 2 3 3 4 2 5 2 2 3 6 3 2 3 2 7 4 2 3 3 4 2 2 2 2 1 2 2 3 5 4 3 2 1 2 3 4 4 2 1 5 4 3 4 3 2 4 5 4 2 4 4 9 8 17 17 17 363 51 51 228 491 491 412 118 118 118 118 402 451 30 30 422 314 90 133 147 147 380 499 319 319 319 94 199 145 113 113 113 113 240 285 34 462 130 402 221 36 108 119 119 308 308 308 396 313 94 199 199 230 215 35 478 232 232 232 172 115 444 444 444 213 213 252 24 131 183 451 30 30 30 378 378 345 389 389 389 389 314 196 242 33 33 394 212 161 161 79 380 288 443 443 169 39 342 342 168 230 230 230 230 215 35 29 89 89 116 394 465 108 119 295 295 295 295 295 143 458 96 198 198 283 455 455 8 32 354 354 431 151 151 240 416 416 192 41 41 324 422 36 36 377 87 87 416 458 27 180 84 496 496 274 349 205 155 155 332 332 245 399 70 70 138 138 138 138 138 372 372 372 59 452 263 263 229 491 491 312 15 15 15 193 193 193 +103-1240-0016 103 783 1 39 13 33 31 5 24 34 19 26 24 5 31 33 18 4 37 18 4 29 5 25 11 31 19 25 31 23 4 31 25 8 33 19 31 33 3 30 33 18 19 24 6 16 1 8 24 22 23 20 25 29 5 40 5 23 11 12 4 33 31 38 5 33 1 5 25 11 8 38 27 25 33 25 27 5 24 19 25 5 33 31 29 20 31 5 37 24 8 25 11 14 22 3 25 32 5 25 31 1 5 25 33 19 23 8 25 27 38 5 33 18 4 40 33 15 22 5 25 24 4 34 39 36 22 5 34 9 14 33 7 33 5 37 4 37 5 25 23 20 33 5 11 15 1 5 22 6 30 11 19 26 23 20 1 4 16 33 14 33 20 24 19 31 19 40 30 15 10 5 23 31 13 33 7 33 1 32 20 18 4 11 25 3 33 16 3 30 33 19 17 27 1 12 4 2 4 3 2 3 3 2 2 6 4 3 2 1 2 2 3 4 3 3 1 3 4 2 3 3 4 5 6 2 7 3 3 3 3 3 2 2 4 2 3 9 10 16 5 3 4 3 4 3 3 4 3 2 4 2 2 2 4 3 2 4 7 2 4 2 2 4 5 5 1 2 3 5 3 7 2 2 3 3 3 4 4 3 1 3 4 5 2 2 3 7 3 3 4 2 4 5 12 4 2 4 3 3 4 7 6 3 2 1 2 1 5 4 3 3 2 3 4 4 4 2 2 7 2 4 3 3 6 4 3 2 3 5 2 2 3 3 3 4 1 2 8 61 4 5 2 3 1 2 4 3 7 16 8 2 3 2 6 9 4 2 4 2 4 3 3 3 2 1 6 3 3 4 9 18 7 4 2 2 2 2 4 3 4 2 3 3 2 4 11 5 17 17 17 296 296 317 317 317 184 184 184 184 491 219 357 357 357 240 385 35 35 478 68 115 273 231 231 231 53 76 465 214 214 214 214 200 248 248 217 217 217 70 65 65 319 151 169 150 342 86 6 272 34 494 202 402 58 72 110 110 486 486 460 460 215 35 29 242 242 116 379 471 478 478 68 68 115 273 278 278 379 379 77 342 342 26 26 241 431 431 376 376 376 376 169 169 150 342 86 238 196 309 331 331 428 428 428 428 146 252 143 36 377 87 87 38 162 86 482 238 272 180 499 306 306 396 396 285 183 183 183 57 57 57 57 399 70 65 106 426 426 426 426 426 426 206 169 352 352 352 352 352 352 352 112 439 417 237 170 47 491 47 47 491 491 2 47 316 316 316 73 73 73 491 287 111 319 203 203 90 76 465 144 208 208 386 386 360 360 360 360 339 339 394 76 465 74 437 437 151 151 368 368 342 168 302 302 302 375 375 122 122 239 127 114 92 92 92 169 35 77 9 142 397 345 181 181 181 167 385 35 131 419 439 225 225 305 491 412 412 83 55 55 322 67 199 111 111 111 111 438 378 43 364 276 109 109 496 496 274 274 274 457 196 309 479 331 84 84 88 88 44 44 44 44 217 217 217 217 217 217 473 278 278 116 199 199 278 240 143 77 342 86 142 221 336 336 74 213 213 213 252 39 342 224 462 462 462 402 196 196 70 65 480 480 480 85 299 299 339 212 131 157 157 157 245 129 129 259 27 27 437 370 370 370 370 348 64 76 310 107 395 60 298 379 379 471 471 77 269 433 112 427 247 247 126 126 326 23 23 101 101 149 149 228 289 412 287 55 446 322 67 76 465 449 351 139 139 139 251 175 111 111 111 111 438 438 10 10 10 309 479 331 84 84 16 16 274 43 43 364 181 181 181 181 325 34 356 281 281 342 86 221 336 108 119 295 295 295 295 143 458 192 242 242 116 33 250 217 217 473 486 486 486 460 460 169 164 164 485 485 485 374 132 143 129 259 144 27 27 437 151 151 164 164 402 401 401 354 29 382 382 313 24 335 14 14 209 287 113 113 113 113 113 285 34 255 223 130 280 280 145 486 486 460 173 173 280 242 242 379 250 359 359 81 474 324 324 252 143 36 377 87 87 236 93 93 93 93 207 207 207 454 454 417 417 417 417 237 170 491 28 28 28 491 491 362 362 305 362 491 362 491 362 491 362 369 369 369 369 369 369 369 369 369 369 369 369 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 391 391 391 391 491 289 289 412 287 255 255 143 458 458 208 441 153 153 387 396 313 325 176 176 328 200 248 250 359 474 474 474 324 19 19 454 454 417 417 417 417 417 237 237 47 491 491 491 435 435 80 289 435 209 209 287 145 486 460 460 460 169 402 35 36 272 382 382 313 143 36 108 119 351 213 213 213 213 246 246 246 246 3 301 196 217 473 258 258 258 31 342 224 494 494 494 31 9 142 397 147 329 329 329 329 143 36 449 395 302 302 497 38 162 68 68 115 273 470 443 240 325 449 180 113 113 113 113 113 450 233 233 227 419 419 439 417 417 417 237 237 237 237 47 491 47 2 491 47 316 316 316 316 491 491 435 373 373 338 400 400 400 400 30 3 58 110 254 254 254 314 196 479 479 307 307 61 61 167 35 393 205 261 25 106 306 306 396 396 313 36 449 87 87 87 416 144 180 180 84 84 84 16 16 16 274 98 98 13 417 417 225 225 193 17 +103-1240-0017 103 715 1 12 5 9 19 17 1 30 4 24 9 23 19 26 6 30 10 14 11 13 24 9 7 14 11 18 7 31 38 13 30 12 5 2 23 19 37 11 1 38 5 40 5 31 22 4 25 33 22 38 6 30 33 14 5 37 5 24 8 23 5 29 12 5 30 27 11 16 14 24 23 19 25 11 40 18 3 23 27 1 33 19 9 20 32 35 30 1 12 5 23 6 26 23 15 25 24 15 11 19 33 5 17 35 11 20 23 16 14 12 14 1 24 4 34 39 36 22 5 34 9 14 33 31 16 3 12 14 1 4 40 32 8 5 25 11 31 8 23 5 25 33 13 40 18 19 40 31 5 25 4 16 33 14 18 19 24 1 7 3 2 5 5 10 3 4 5 5 1 3 4 4 6 4 5 2 1 2 4 3 8 3 2 7 9 6 3 3 2 2 3 25 4 5 4 3 17 3 2 3 3 5 3 7 2 4 5 2 3 2 2 4 2 2 3 4 9 3 3 3 2 3 4 5 2 5 2 3 2 4 3 2 2 4 4 4 9 37 3 2 2 3 5 3 8 17 2 3 5 5 7 3 8 4 3 4 2 2 3 2 4 4 6 3 3 6 6 3 8 15 5 5 4 2 3 4 4 5 3 3 3 4 4 6 4 6 13 6 3 8 12 3 1 3 4 6 4 3 2 2 2 3 3 1 3 5 5 3 6 3 2 3 2 5 5 5 17 17 17 296 296 51 184 320 320 127 5 455 236 129 259 354 278 278 278 278 278 252 416 416 416 192 472 225 397 225 225 80 80 197 147 147 147 380 499 486 486 486 365 460 203 53 53 53 212 354 302 175 81 176 176 135 328 200 200 248 335 14 14 411 411 153 153 372 372 396 313 143 310 107 395 382 382 313 325 34 121 121 53 53 394 212 212 180 180 486 486 315 450 450 88 372 372 396 313 24 34 58 72 72 72 72 268 268 268 268 268 268 450 450 274 274 271 186 39 323 9 142 397 336 345 109 109 109 264 313 216 216 22 5 455 236 458 27 27 351 151 151 169 169 164 164 472 221 336 354 29 498 498 313 313 143 77 270 342 342 26 26 251 241 431 431 278 278 120 120 173 173 352 352 272 419 229 247 247 126 126 292 292 326 326 326 326 101 101 101 149 149 228 289 320 345 141 141 141 281 453 168 44 44 38 38 232 232 232 105 105 445 445 470 365 365 365 365 460 330 388 64 131 472 472 221 458 208 208 441 441 441 153 153 153 387 387 285 285 300 382 382 467 69 69 130 130 280 44 44 44 399 70 65 65 265 265 265 85 85 85 85 139 293 175 175 175 230 230 230 215 402 198 198 22 283 455 42 42 147 380 380 496 496 496 496 274 24 131 393 205 155 165 165 165 165 53 250 251 251 241 431 329 278 330 388 379 33 471 49 9 142 58 72 437 481 481 481 481 481 293 175 175 81 84 84 84 496 274 98 13 13 417 170 170 491 170 491 28 491 28 491 28 362 362 362 362 362 362 362 491 491 491 40 305 491 305 305 305 316 435 491 435 435 435 491 491 435 435 7 465 108 377 87 87 8 420 420 420 422 186 338 338 395 395 487 498 498 498 498 59 59 59 263 229 229 247 126 126 126 326 326 326 326 326 326 101 101 101 149 149 228 289 320 127 5 5 455 251 251 251 241 235 235 235 235 235 235 235 348 200 248 248 248 251 251 241 431 290 290 290 290 290 434 434 434 339 195 33 250 217 473 476 476 476 476 252 325 325 191 191 191 325 34 44 44 416 129 259 144 484 484 484 236 314 401 401 259 371 485 485 139 139 302 497 497 349 234 234 261 25 498 498 498 498 498 493 216 216 300 300 334 334 59 452 452 263 263 417 417 237 237 237 237 47 47 47 491 47 491 491 73 491 73 289 7 217 473 65 486 486 329 460 169 169 164 164 219 485 485 374 422 143 458 144 27 351 329 329 329 169 169 164 164 472 221 336 354 495 498 498 313 385 35 77 342 86 142 393 393 261 25 91 91 91 91 91 206 493 216 216 300 334 334 59 59 452 263 229 247 247 126 126 326 326 326 326 408 408 149 149 491 412 83 83 253 253 253 253 253 99 338 338 338 338 338 338 395 180 499 499 265 265 265 265 85 85 85 146 146 464 89 89 446 446 394 478 66 68 68 115 273 265 265 265 85 146 146 146 175 175 81 11 11 11 64 76 465 34 253 253 253 253 342 168 257 257 257 257 31 162 68 68 68 115 273 273 319 319 319 388 94 199 145 145 145 460 460 460 460 402 402 96 272 300 382 382 58 58 57 57 57 57 57 57 203 381 117 404 229 247 15 193 193 17 +103-1240-0018 103 580 1 18 4 11 17 3 33 13 40 16 3 30 5 38 15 13 40 18 20 29 3 31 5 9 23 20 22 35 11 16 14 24 18 19 40 16 13 23 27 24 13 25 38 19 12 7 33 4 22 10 36 5 23 20 30 20 33 30 20 33 19 26 19 25 33 5 12 5 38 35 11 40 1 38 19 25 18 20 16 7 25 11 19 11 18 19 40 18 27 24 31 33 13 11 1 17 30 20 25 17 15 9 5 23 40 38 5 40 9 19 23 33 4 33 12 5 16 14 34 5 31 33 13 21 5 37 18 19 40 22 23 19 30 11 23 4 25 11 1 4 25 11 12 13 30 19 33 38 5 40 33 19 12 19 31 11 15 1 6 5 1 3 3 4 3 2 6 4 8 3 4 4 4 3 3 2 2 4 5 5 2 3 3 3 5 4 4 3 3 2 3 1 6 2 2 4 3 5 4 5 3 2 2 7 4 7 4 5 3 2 3 4 3 3 4 3 2 2 2 5 2 3 2 2 2 3 6 4 4 6 1 3 2 2 2 3 5 4 2 1 2 2 2 1 4 3 4 3 4 4 3 5 31 3 2 5 3 3 6 2 2 5 4 2 2 4 3 4 5 2 2 2 3 2 6 5 2 3 4 4 4 5 2 2 1 2 5 3 2 3 4 3 5 6 2 5 17 4 2 3 1 4 4 2 4 2 6 4 3 3 2 3 5 2 11 6 17 17 17 363 51 51 491 412 412 254 254 254 254 131 221 458 144 180 106 405 405 206 240 285 34 253 253 31 31 86 142 142 393 234 261 25 106 106 481 481 306 306 306 306 372 406 467 467 255 255 255 43 43 345 109 109 403 403 403 207 464 253 253 253 342 342 30 30 30 422 129 74 437 437 437 405 405 206 169 150 342 342 224 494 494 236 129 259 26 359 359 166 166 166 422 129 259 144 27 389 389 389 120 37 24 24 131 472 393 155 165 165 165 165 53 473 58 183 257 257 257 31 342 86 142 393 261 25 470 443 139 175 81 84 84 84 274 399 217 217 473 136 136 136 136 282 388 303 195 404 133 364 345 333 333 220 216 114 180 113 113 113 113 113 450 167 35 335 14 411 145 145 145 460 460 460 178 178 96 96 99 436 436 395 134 134 134 134 134 134 359 359 166 166 166 324 301 42 147 456 456 456 236 36 161 161 487 487 487 213 213 252 325 325 176 176 135 135 200 200 464 340 340 116 64 76 108 377 123 123 216 22 283 455 43 43 364 276 109 109 496 496 496 37 37 37 24 471 270 270 323 323 18 97 397 336 82 409 409 409 409 67 58 183 30 30 30 422 349 205 261 25 180 315 315 450 450 413 64 131 34 277 236 325 34 257 257 257 31 342 142 72 72 350 350 350 350 203 53 53 394 478 66 86 238 6 272 470 470 120 120 120 37 24 131 419 427 491 491 247 312 312 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 326 21 408 408 408 408 391 491 491 289 491 144 208 79 288 288 360 360 360 434 339 33 90 212 445 445 180 171 171 171 171 252 215 8 29 302 302 497 497 497 185 49 9 397 397 345 141 141 281 281 9 142 221 336 336 354 278 278 139 139 139 293 293 122 122 449 449 415 415 415 129 198 198 22 455 38 349 234 234 261 25 498 498 498 498 396 240 216 114 300 459 271 31 162 86 68 6 272 470 470 120 120 120 240 314 314 259 108 377 123 223 130 402 257 257 257 257 453 9 221 336 336 208 208 386 386 485 286 286 286 468 313 313 24 314 26 26 251 241 431 294 294 294 294 294 282 282 388 303 303 212 131 419 78 491 491 247 312 126 292 292 292 292 326 326 408 149 149 228 491 491 83 83 55 55 322 67 466 212 212 127 114 264 264 468 468 406 467 177 177 177 131 133 133 364 345 141 141 141 141 141 368 368 31 342 86 86 6 108 377 123 123 216 216 258 258 258 258 31 342 86 86 6 6 371 93 93 93 93 207 207 19 19 454 417 417 417 417 417 193 193 193 +103-1240-0019 103 770 1 9 13 30 23 20 37 19 40 5 9 5 23 16 14 24 12 5 24 15 25 30 27 11 5 23 6 26 38 19 10 1 6 23 12 20 5 12 14 1 4 37 5 25 23 20 18 7 31 5 40 38 14 31 27 31 27 32 5 9 23 20 31 19 10 36 15 33 19 11 1 24 19 31 19 40 30 15 10 5 23 19 25 11 1 11 19 11 25 3 33 22 6 23 1 23 19 37 19 26 19 25 31 5 10 5 29 23 15 31 1 23 19 37 19 26 1 4 33 6 23 1 19 33 31 21 5 31 33 31 33 15 19 26 12 4 33 31 38 5 33 1 32 20 31 13 11 13 40 32 20 31 33 13 29 33 5 23 6 26 12 5 11 20 29 30 5 33 19 11 17 30 4 31 20 23 15 25 1 9 2 4 3 3 3 3 3 4 2 2 2 5 3 2 3 1 2 3 7 4 4 6 3 3 2 7 4 3 2 6 2 7 3 3 4 3 4 6 1 7 3 1 2 3 3 5 7 3 4 4 3 3 5 4 8 2 5 3 2 2 3 5 2 5 4 4 3 1 6 35 4 3 3 3 4 3 2 4 3 6 4 4 7 3 3 2 2 2 6 3 7 7 9 1 10 2 3 4 5 2 3 3 3 3 2 6 2 6 7 5 9 3 3 5 6 1 5 3 3 12 28 5 3 4 4 1 6 3 6 3 11 1 5 2 4 3 2 5 3 7 10 7 3 6 5 3 2 2 4 3 5 4 2 5 2 3 3 4 5 2 2 4 5 4 6 2 5 3 6 3 3 5 7 3 6 7 5 7 17 17 17 296 296 184 184 320 320 320 354 264 264 264 468 468 468 313 359 359 166 166 166 301 378 280 280 278 278 278 368 453 342 168 494 134 8 8 100 100 100 100 497 497 349 155 155 165 165 165 165 466 466 22 283 455 217 217 473 290 290 290 290 290 434 434 339 33 250 42 147 147 380 288 496 496 496 496 274 274 24 325 34 255 255 255 175 241 235 235 235 235 235 235 235 413 200 248 250 345 407 407 407 407 407 310 107 447 447 483 226 226 209 287 297 297 297 297 297 293 293 122 216 22 448 448 448 464 464 493 493 493 493 216 300 300 382 382 313 335 14 226 226 209 411 145 145 486 460 460 173 280 29 242 116 359 466 81 166 324 324 3 58 72 72 268 268 268 268 268 268 450 274 368 368 9 168 300 50 50 50 49 9 142 397 336 347 347 347 313 186 162 54 172 344 344 344 344 344 186 186 162 482 482 482 115 273 344 496 496 186 99 436 436 395 134 134 134 8 359 359 166 166 166 422 162 68 68 115 470 278 278 143 310 107 395 485 469 134 88 158 158 158 158 158 24 325 191 191 191 37 24 419 439 78 170 170 28 491 28 28 491 491 28 491 362 305 362 362 362 40 40 40 40 201 491 366 491 491 305 366 366 491 316 316 316 491 316 73 73 289 7 217 473 258 258 258 342 342 224 494 494 494 31 9 142 397 147 329 329 329 329 329 36 107 107 395 302 302 497 497 251 251 241 81 81 278 278 330 388 379 195 64 212 131 419 439 225 225 225 225 80 491 75 371 371 278 278 314 196 309 479 307 307 307 61 61 167 131 90 221 458 144 27 437 437 437 437 437 481 481 481 481 481 481 293 293 293 497 497 335 197 197 197 197 197 197 7 251 251 251 251 251 241 241 278 278 173 173 280 176 176 176 135 328 200 200 200 340 340 340 340 394 478 342 224 273 344 344 344 449 449 44 44 44 129 259 74 425 425 386 343 343 343 343 343 358 358 358 39 39 433 160 97 97 197 226 226 80 491 491 491 7 7 251 251 251 251 241 431 278 278 278 173 173 176 176 176 176 328 328 200 200 200 200 335 335 209 209 415 415 415 415 415 131 106 106 106 297 297 297 297 182 375 375 375 98 98 13 13 229 170 170 312 312 312 312 292 292 292 292 292 292 21 21 21 21 21 21 21 21 408 408 408 408 408 391 391 491 491 412 177 177 177 177 177 356 356 142 238 238 6 310 395 395 151 169 150 342 342 86 238 6 491 478 66 66 68 232 68 238 6 272 470 470 403 403 403 171 171 171 246 246 176 176 328 328 200 200 248 248 216 114 92 92 92 92 35 77 77 9 397 221 276 181 181 181 181 181 240 385 35 227 419 419 439 225 225 225 225 225 225 225 225 225 225 373 373 373 338 338 400 400 400 400 422 422 162 68 68 68 273 470 120 120 120 120 37 37 24 34 253 253 253 99 338 338 400 400 400 30 422 162 232 232 232 238 6 6 371 470 189 151 215 215 35 96 96 272 272 255 255 251 251 241 431 235 235 235 235 235 235 248 248 212 22 283 455 236 239 239 384 371 213 213 213 252 215 129 259 402 133 42 42 42 147 380 499 151 151 240 240 449 449 191 191 191 37 314 314 90 90 401 401 491 208 208 79 380 380 486 486 376 460 460 169 150 342 342 342 224 41 324 324 301 251 251 251 241 431 431 290 290 290 290 434 434 434 339 339 117 404 229 247 247 126 326 193 193 17 +103-1240-0020 103 670 1 9 6 30 11 14 11 38 19 12 38 8 23 11 30 27 40 9 35 32 5 40 1 19 33 31 25 27 38 5 25 11 14 24 4 34 39 36 5 25 11 24 3 30 19 23 5 3 30 9 27 34 5 23 19 33 5 23 3 11 1 23 19 37 19 26 5 38 15 9 4 22 18 20 30 9 8 12 5 24 31 13 23 37 40 1 33 30 20 40 3 30 25 33 24 5 10 22 5 24 29 5 25 20 1 12 27 11 19 30 25 27 40 19 16 12 15 38 14 12 13 30 11 9 20 19 25 5 16 5 37 12 5 24 1 8 11 30 36 34 14 23 35 22 4 33 29 20 29 5 23 1 33 19 9 20 32 35 30 1 7 3 4 3 3 3 3 2 2 3 5 7 3 4 3 5 4 3 2 5 4 7 27 3 2 4 3 5 4 2 3 3 3 4 4 4 2 4 2 1 2 1 2 5 3 3 3 4 3 8 4 5 1 3 1 2 3 6 10 6 6 6 3 2 2 4 3 5 7 3 6 5 3 3 4 2 5 2 2 3 4 4 5 3 8 15 6 2 5 4 3 1 2 3 3 3 4 5 2 2 2 2 2 7 20 3 5 2 5 3 5 6 5 2 3 2 6 5 5 3 2 3 2 2 3 3 3 2 4 2 3 2 3 10 26 5 3 3 2 3 2 2 2 3 2 3 4 5 2 2 7 27 3 1 2 4 5 2 8 5 17 17 296 296 296 184 184 320 354 153 153 153 153 387 387 387 396 285 131 300 382 382 313 131 133 133 345 333 220 220 220 133 133 364 276 276 346 346 346 265 85 85 85 85 139 293 293 122 122 472 133 147 147 380 288 496 496 496 274 368 368 9 142 221 336 354 109 278 278 99 99 436 107 50 50 50 50 50 185 269 433 433 160 112 427 491 491 312 312 126 292 292 292 292 292 292 326 326 21 21 326 326 21 21 21 408 408 408 408 149 228 289 177 177 177 356 356 342 86 238 196 479 331 231 231 231 274 274 43 364 276 174 174 319 319 348 348 64 212 300 334 382 382 245 399 217 473 65 486 486 460 169 169 164 164 485 485 485 8 345 88 109 242 242 116 250 250 70 65 65 498 245 42 147 380 134 134 139 175 175 423 423 423 423 353 353 353 406 467 245 245 8 32 401 401 401 354 354 106 496 496 496 274 169 164 164 224 494 255 251 251 81 278 278 26 26 302 302 497 335 14 14 411 284 284 284 405 405 405 206 206 206 206 37 24 131 404 404 439 225 225 225 225 417 80 491 80 7 7 251 241 241 278 278 278 173 280 176 135 135 135 200 199 255 255 43 43 364 276 109 403 403 403 403 171 246 324 301 8 259 354 376 376 376 376 376 178 178 458 458 192 183 183 286 286 286 286 286 286 468 245 245 8 354 62 62 62 62 438 216 114 57 203 53 394 478 342 232 172 115 279 279 279 279 279 279 279 375 375 233 270 270 269 433 390 112 112 56 491 56 47 491 305 491 187 187 187 187 391 391 316 491 73 73 73 491 289 108 119 487 487 487 288 213 213 246 246 318 368 453 168 106 353 353 353 353 396 35 465 472 196 70 70 383 383 383 383 36 107 447 221 336 144 27 437 319 319 53 53 76 465 242 242 242 94 41 41 41 41 19 19 454 229 247 126 126 126 326 326 326 326 326 326 326 326 326 101 101 101 149 149 228 491 320 127 114 84 84 496 274 236 239 384 371 485 286 286 286 468 382 313 10 10 479 331 84 84 84 496 16 274 368 368 453 342 168 118 118 118 118 402 198 198 114 0 0 0 0 301 378 43 364 347 347 347 347 498 467 396 313 216 216 114 0 222 382 313 314 314 239 354 420 420 420 464 464 44 116 94 479 230 230 169 169 352 352 69 223 223 130 402 402 198 114 57 57 57 203 381 381 117 404 439 439 439 439 439 439 237 78 78 170 491 491 312 312 312 12 12 1 292 21 21 21 408 408 408 391 391 391 228 491 412 412 287 111 111 438 438 314 133 133 380 499 499 493 216 216 300 382 134 251 241 367 367 367 458 192 415 415 457 401 401 259 74 351 213 213 213 213 252 215 259 259 29 100 100 375 375 98 98 13 417 417 417 237 237 237 237 237 237 237 237 237 237 237 237 237 47 316 491 491 316 316 491 491 491 435 289 289 289 108 377 87 8 420 420 420 420 422 99 338 338 395 395 487 498 498 498 59 59 59 452 263 263 417 417 193 193 193 +103-1240-0021 103 740 1 12 15 31 20 24 22 5 25 33 13 25 33 19 11 19 25 5 16 9 5 33 12 13 25 8 31 5 29 27 40 12 13 30 39 36 40 11 33 36 19 33 1 5 9 3 11 20 22 5 25 17 19 33 39 36 40 11 33 36 13 25 20 34 19 26 1 20 37 19 25 33 5 9 20 19 26 18 4 26 11 13 40 12 20 8 30 19 32 24 5 25 31 13 11 1 38 19 34 12 19 31 24 19 31 19 40 30 15 10 5 23 31 33 13 29 33 7 33 5 37 12 5 23 15 25 1 19 25 33 5 12 5 9 4 22 39 3 30 11 5 37 17 30 20 25 17 15 9 5 23 40 1 37 13 30 20 17 30 20 25 4 25 11 25 20 33 4 25 11 29 30 19 31 8 31 38 5 40 12 4 33 39 3 30 11 1 7 4 3 5 4 3 3 3 2 4 2 2 3 2 2 1 5 4 5 2 1 3 2 2 2 5 4 2 3 8 4 1 2 3 5 4 3 2 2 3 2 8 25 4 3 5 3 2 3 3 1 3 1 2 4 2 4 1 2 4 1 3 3 4 2 6 7 6 3 2 1 2 2 2 3 2 4 5 4 9 2 3 4 3 4 1 6 2 4 2 2 3 4 4 5 58 3 3 4 2 5 9 2 2 4 2 5 3 4 3 2 3 5 4 3 5 2 6 2 2 2 2 2 5 8 7 5 5 5 2 1 2 2 3 6 4 3 5 2 3 1 4 3 2 5 3 3 5 2 3 4 9 19 4 4 3 5 5 4 8 4 4 3 3 3 6 5 3 1 2 2 2 3 4 8 5 3 2 3 2 4 1 4 6 4 4 7 17 17 296 296 296 184 184 184 320 435 0 0 0 0 422 162 68 115 444 444 444 360 434 339 394 90 76 144 445 121 121 116 33 76 465 108 432 432 432 432 379 64 76 449 191 191 191 325 34 34 196 309 479 479 331 230 230 230 169 169 352 352 221 401 259 159 159 159 236 259 127 361 361 361 94 199 111 111 111 438 438 162 342 224 494 494 494 129 74 84 84 496 496 496 496 274 274 368 368 9 9 198 114 0 222 222 468 313 313 219 219 219 485 485 374 374 186 186 323 86 238 6 377 123 123 88 277 277 277 277 385 131 227 419 439 78 229 170 491 312 312 292 292 292 292 292 292 326 326 326 326 326 326 23 23 101 101 408 391 391 491 491 412 287 44 8 8 354 354 106 284 91 206 206 240 325 41 324 324 143 144 389 389 389 389 200 248 192 445 213 324 219 219 219 219 485 374 374 368 162 54 86 6 272 123 123 88 109 109 475 475 94 475 475 475 422 349 164 214 214 214 214 214 328 200 200 117 404 454 225 225 225 225 225 225 451 451 213 357 357 357 173 280 29 242 116 33 465 377 123 8 354 420 420 420 360 135 135 200 200 248 58 58 72 72 110 110 486 486 365 365 365 328 200 200 195 195 248 248 212 239 34 253 253 253 253 453 342 342 198 22 448 448 448 464 145 284 306 306 468 406 467 288 469 469 99 436 447 221 196 473 275 275 379 394 478 342 68 115 470 470 120 120 120 37 24 117 263 417 417 417 417 237 237 237 491 491 28 28 491 28 491 491 305 362 491 491 362 362 362 491 362 362 491 40 211 211 491 218 491 369 369 369 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 260 391 391 228 228 491 320 320 333 333 333 220 314 35 401 259 127 114 258 258 258 258 258 31 39 342 433 160 97 221 336 196 473 258 258 258 258 342 224 494 494 494 31 9 142 397 397 147 329 329 329 329 329 143 36 310 395 302 302 302 497 497 162 232 232 232 238 6 6 371 470 189 443 151 215 35 35 96 401 272 34 180 113 113 113 113 113 285 449 223 223 130 198 198 22 283 455 251 251 251 241 431 290 290 290 290 290 434 434 434 339 339 117 117 404 225 225 225 225 225 80 226 491 209 188 340 340 340 33 33 64 212 377 123 123 216 22 283 455 8 354 180 376 376 376 376 460 178 178 35 458 192 219 485 180 306 306 306 306 306 306 396 24 285 69 462 462 402 402 401 259 208 79 288 288 213 213 360 434 434 339 248 394 239 445 180 171 171 171 171 252 215 8 354 100 100 375 375 375 185 185 269 433 433 18 18 112 439 439 237 237 237 237 47 47 491 491 2 2 316 316 491 316 316 491 73 289 289 7 7 4 104 104 104 104 104 104 468 468 337 337 337 324 301 416 32 32 208 79 79 79 288 288 360 360 360 246 246 246 434 434 339 94 199 89 194 194 194 446 446 64 212 212 465 196 196 309 398 398 213 213 246 246 252 143 36 108 119 351 89 446 446 446 394 76 401 74 190 492 492 38 162 342 68 115 273 265 265 265 428 85 146 146 358 358 39 342 142 397 345 141 141 141 281 453 342 198 198 114 92 92 92 240 314 131 219 219 180 180 106 306 306 306 306 306 59 59 37 131 419 427 229 247 247 15 193 193 193 17 +103-1240-0022 103 818 1 31 13 33 5 9 7 33 3 25 38 5 25 31 8 11 38 19 34 17 30 15 33 1 29 15 33 30 20 3 30 22 5 23 38 19 23 27 40 1 5 25 11 12 20 5 12 14 38 19 34 29 30 19 24 23 3 24 9 3 30 11 20 40 1 25 3 33 5 31 33 30 15 31 33 19 22 25 6 30 31 33 27 25 38 5 40 33 5 9 20 31 20 25 1 16 6 30 24 19 31 19 40 30 15 10 5 23 38 35 11 18 4 37 31 20 25 19 33 19 16 12 13 30 18 4 11 9 19 25 1 29 30 8 37 5 33 23 20 32 20 38 5 40 5 37 12 20 5 29 19 25 39 5 25 12 5 33 24 3 30 19 23 5 22 5 34 9 14 33 31 38 13 29 33 12 4 33 39 3 30 11 27 37 14 13 40 6 16 5 25 13 40 32 20 31 38 13 29 33 18 14 18 7 31 1 8 6 4 2 2 3 10 4 3 5 3 3 4 6 7 3 2 2 3 3 3 5 5 1 4 4 5 2 1 5 4 3 2 3 5 2 4 7 6 21 8 1 2 1 6 3 4 4 3 2 4 4 3 2 10 3 3 3 2 2 1 3 6 9 23 4 3 3 3 6 4 2 9 7 3 3 7 3 3 3 6 3 8 1 4 2 3 2 2 2 4 5 8 6 14 6 2 2 3 2 3 2 5 3 4 3 4 5 4 2 2 3 2 3 6 3 2 5 2 2 6 3 2 3 5 3 4 3 3 10 13 3 3 6 2 2 3 4 4 4 3 3 1 4 3 3 2 3 3 5 2 3 4 2 2 2 2 3 2 2 3 3 4 2 4 4 4 3 4 3 6 2 2 4 4 2 5 3 2 6 2 2 6 4 5 2 4 7 5 3 2 2 2 4 3 4 2 3 3 1 4 3 4 7 9 15 17 17 17 363 363 51 51 51 184 373 66 68 115 273 470 443 443 240 449 449 34 255 8 354 180 113 113 113 113 113 113 113 450 413 413 36 449 34 106 125 125 125 125 203 250 250 250 70 174 174 174 319 319 348 33 394 478 478 68 68 115 273 273 265 265 265 85 85 85 146 24 133 133 345 333 333 220 220 142 221 401 491 208 79 484 484 484 484 484 252 252 457 36 472 472 401 401 491 74 74 351 351 171 171 252 143 36 161 161 487 485 485 485 464 106 499 306 306 396 396 178 178 458 144 302 100 497 497 497 497 364 364 364 276 109 278 139 175 175 81 81 84 84 84 496 274 185 185 185 269 433 18 427 229 247 247 126 126 292 292 326 326 326 326 326 326 326 326 326 408 101 149 149 149 149 228 412 83 83 194 194 194 322 67 466 212 22 448 448 448 464 464 493 493 493 493 493 216 300 300 382 382 245 378 43 345 333 333 220 220 142 221 401 401 491 74 190 190 487 288 278 278 203 53 53 195 195 195 250 251 251 251 241 431 284 426 426 426 203 53 212 212 29 29 495 313 313 325 41 41 41 41 19 318 185 185 433 433 433 160 112 112 417 417 237 237 237 237 237 237 237 491 2 491 201 201 491 316 435 491 491 435 435 435 435 289 289 491 7 479 331 307 307 61 61 285 44 44 44 44 38 482 482 482 482 238 6 336 161 79 487 288 288 403 403 171 171 246 246 252 422 186 162 232 232 232 68 238 6 272 470 278 278 278 178 458 458 192 472 196 196 309 331 157 157 157 157 372 396 186 162 54 482 482 238 238 6 371 180 84 84 84 496 274 413 413 413 195 250 250 345 141 141 141 281 9 238 6 272 87 87 354 420 420 420 420 422 162 68 68 115 267 267 267 267 267 434 434 434 434 339 303 303 404 13 491 247 247 126 126 292 326 326 326 23 101 101 149 228 491 491 373 155 155 155 332 332 332 372 245 399 70 258 258 258 31 342 224 494 494 368 453 142 142 397 147 329 329 329 329 329 329 36 310 107 395 302 302 302 302 497 497 43 43 364 364 345 389 389 389 314 58 58 110 202 202 202 202 402 478 66 68 68 267 267 267 267 267 434 339 94 277 277 277 37 37 325 34 118 118 118 118 402 402 472 177 198 127 114 0 222 222 468 245 58 72 110 110 254 254 254 240 314 131 133 401 401 354 137 137 137 137 275 303 303 303 48 404 439 439 439 78 237 47 47 47 491 491 47 316 316 491 73 289 73 491 491 190 488 488 488 488 488 428 146 146 173 173 29 469 469 236 36 26 359 359 359 474 474 474 324 186 99 338 400 400 400 30 378 8 345 141 141 281 453 342 168 168 223 223 130 129 198 198 448 448 448 464 464 255 143 129 259 74 351 351 278 278 360 339 398 398 398 464 275 275 116 64 212 198 45 45 45 236 129 196 196 473 65 329 329 406 406 288 288 139 175 175 175 423 423 423 143 458 144 27 437 151 151 169 169 402 35 221 336 354 29 498 382 313 313 35 131 472 482 482 482 482 397 397 189 189 189 189 169 35 96 96 272 472 472 198 127 114 92 92 92 92 240 385 449 449 219 464 180 106 306 306 306 306 206 396 285 285 34 84 410 410 410 410 173 173 29 29 495 495 406 406 467 467 253 253 453 9 168 14 14 411 284 405 405 206 169 349 352 352 29 242 242 116 94 199 253 253 253 253 338 338 400 400 400 30 422 162 232 482 397 397 189 189 189 215 215 35 96 272 472 156 156 156 156 245 58 72 72 268 268 268 268 268 268 450 450 271 186 39 390 390 390 18 427 56 247 312 15 15 15 292 292 292 292 15 193 193 193 193 193 17 +103-1240-0023 103 607 1 18 38 5 25 22 35 11 18 4 37 20 33 5 25 5 24 20 23 6 16 12 5 17 30 7 25 11 38 19 12 7 33 27 37 14 9 30 19 24 19 26 12 5 29 30 5 37 14 9 20 5 23 29 13 22 5 37 11 14 33 1 24 19 31 19 40 30 15 10 5 23 30 4 29 33 31 24 3 30 33 23 20 4 33 12 20 22 19 10 5 25 11 6 30 1 5 25 11 31 33 13 29 33 19 25 38 19 25 9 19 11 19 25 33 19 11 36 31 27 1 12 20 22 19 10 5 25 4 33 17 30 20 25 17 15 9 5 23 40 38 5 40 5 10 19 30 16 5 23 5 29 3 30 33 24 5 25 33 1 18 1 3 4 3 3 2 1 2 1 4 5 1 2 3 2 5 7 6 7 5 2 1 3 3 6 2 3 3 1 3 4 4 6 3 2 4 2 2 2 3 5 2 2 3 3 2 2 7 3 2 4 2 7 2 4 3 3 4 8 5 47 4 3 3 3 5 3 3 4 3 4 4 4 3 3 5 4 2 3 3 2 4 2 2 2 2 3 3 5 2 4 2 5 6 3 5 1 3 5 3 4 3 2 2 4 3 2 3 3 3 2 2 4 3 2 2 5 6 8 22 3 3 3 4 5 3 2 3 3 3 2 5 3 3 6 2 2 6 2 3 1 4 3 5 2 3 3 2 2 2 4 3 3 3 1 2 2 4 13 17 17 17 296 363 363 363 363 363 363 363 101 51 51 51 228 228 491 320 345 174 174 174 174 174 348 348 64 248 465 144 389 389 389 389 34 202 202 173 280 280 444 213 213 213 252 314 196 242 242 116 479 199 44 44 44 217 217 217 473 398 213 213 213 286 139 139 302 302 375 375 335 226 14 287 287 284 405 405 206 169 352 402 198 198 22 283 455 416 144 208 79 380 315 315 315 315 450 450 413 413 212 212 131 133 345 333 333 220 220 216 180 113 113 113 113 167 35 131 14 14 411 410 410 410 410 173 29 29 382 245 8 129 259 190 380 288 288 278 203 53 176 176 135 328 200 200 248 248 212 22 283 455 129 259 74 190 492 492 245 173 280 280 498 498 498 498 498 396 215 8 354 337 485 485 464 139 302 497 497 497 129 401 401 259 74 351 351 443 443 178 178 458 192 192 462 462 462 402 32 239 384 371 498 498 498 498 498 59 396 385 24 227 419 419 439 417 417 237 237 237 237 237 237 491 28 237 28 362 491 491 362 491 491 362 362 362 491 362 362 491 435 211 491 369 369 369 21 21 21 21 21 21 21 21 21 21 408 408 408 149 149 228 289 491 7 217 258 258 258 258 342 342 224 494 494 494 281 9 142 397 336 147 329 329 329 329 329 329 310 107 395 302 302 302 497 497 497 42 42 147 380 486 486 486 460 460 215 35 96 36 472 66 68 68 172 482 105 196 70 70 65 306 306 396 396 313 35 26 26 359 359 474 474 324 464 415 415 415 35 22 283 455 416 458 458 445 278 278 278 36 310 107 107 60 298 116 33 33 394 212 384 371 153 153 153 153 372 372 372 59 59 452 263 225 225 225 83 83 55 55 322 67 67 478 478 232 232 232 68 238 6 371 470 443 443 443 215 35 96 96 272 34 340 340 116 33 250 250 250 409 409 409 409 116 33 394 32 239 354 278 278 278 314 196 242 242 242 33 394 76 36 377 87 87 236 239 371 371 374 374 374 132 186 162 54 172 224 273 84 84 84 84 16 98 98 13 417 417 417 417 237 237 237 201 211 211 187 260 260 391 391 391 491 316 316 73 491 289 491 127 5 5 455 143 458 445 445 278 278 278 278 143 310 107 107 395 298 298 116 94 199 415 415 415 415 131 221 401 144 79 79 288 288 360 360 434 434 200 248 212 239 445 180 171 171 171 171 171 252 8 354 100 100 302 497 497 81 253 253 453 142 221 345 141 141 281 453 342 168 44 236 236 36 310 107 395 485 286 286 286 313 349 349 155 262 262 100 175 81 255 255 129 259 74 437 437 306 306 306 396 396 457 233 196 291 291 291 291 243 243 227 427 427 247 247 15 126 15 292 292 292 193 193 193 193 17 +103-1240-0024 103 763 1 6 30 38 35 11 18 4 37 9 19 25 10 19 30 16 5 23 1 19 16 19 33 18 4 11 25 3 33 9 19 25 31 27 29 15 25 16 5 23 20 22 23 20 25 13 40 33 19 17 19 37 19 33 31 5 24 34 19 26 5 37 12 20 5 29 19 30 5 25 31 5 37 5 25 5 25 39 36 40 11 29 3 30 23 14 1 19 33 31 38 19 25 11 27 40 23 35 22 33 1 20 31 33 4 25 11 38 13 31 33 1 34 30 36 12 5 38 13 31 33 38 5 25 1 23 35 22 19 26 7 33 6 25 12 5 9 4 22 39 3 30 11 1 22 15 24 5 16 23 5 11 5 37 24 13 23 27 21 36 25 31 5 25 23 8 33 1 9 5 33 12 20 31 33 38 5 25 1 14 7 6 5 2 2 2 1 3 3 3 5 5 2 4 5 2 6 4 6 3 2 2 2 2 1 3 5 4 2 2 4 6 2 8 3 5 3 2 2 5 7 3 8 2 3 5 2 2 5 2 3 3 3 3 2 4 2 2 5 2 2 3 3 3 6 3 5 5 3 4 2 2 2 3 3 4 7 5 4 3 5 4 4 4 8 27 5 1 3 3 2 4 3 6 4 2 3 3 2 2 10 5 3 4 3 3 3 6 6 5 12 5 2 3 1 3 4 3 5 3 4 2 8 15 4 2 3 4 4 4 3 2 3 1 2 3 6 4 3 8 4 4 12 4 4 3 4 5 3 4 2 3 3 7 2 6 6 8 8 6 6 3 4 3 6 8 17 3 1 3 3 11 5 5 4 2 6 11 17 17 17 296 363 363 363 51 51 51 51 184 491 491 412 287 157 157 157 157 372 372 372 396 396 245 245 43 364 364 345 389 389 389 240 325 34 202 202 202 402 221 259 354 137 137 137 137 137 33 394 394 76 310 107 107 395 395 286 286 286 468 396 245 349 349 155 262 262 262 100 100 375 98 98 117 417 417 225 80 80 491 209 118 118 118 118 118 402 177 177 177 177 36 34 254 254 254 314 196 479 331 307 307 307 61 61 167 457 35 259 137 137 137 137 137 33 394 478 478 68 172 344 344 344 344 274 129 129 74 74 72 72 290 290 290 290 290 339 339 33 90 393 155 262 262 262 359 359 166 166 166 324 422 143 129 458 144 208 208 208 386 386 386 444 360 360 360 360 246 434 434 339 94 199 253 253 253 253 31 342 86 238 6 272 87 87 87 416 458 445 485 278 278 173 173 29 277 277 385 314 478 478 68 115 273 231 231 231 53 53 76 198 214 214 214 328 200 200 200 69 223 223 130 198 198 22 448 448 464 464 255 255 129 129 74 74 485 485 485 485 286 286 468 468 467 337 11 11 11 11 11 379 379 77 77 342 342 224 69 69 223 130 44 44 44 116 94 199 319 319 319 348 33 33 219 219 219 219 219 485 485 374 374 374 132 318 368 368 54 54 238 272 472 221 336 74 74 437 437 306 306 306 306 306 396 396 134 175 175 81 334 334 334 334 59 452 452 13 491 247 312 312 126 292 292 292 292 292 292 326 326 326 21 21 21 21 21 21 21 408 408 408 408 149 228 491 412 177 177 177 177 77 9 142 397 336 276 109 109 278 330 348 33 64 212 384 84 84 84 84 496 274 185 49 9 26 26 241 367 367 367 367 143 96 36 131 483 226 226 209 411 213 213 213 213 213 252 318 39 39 342 86 86 238 6 272 483 440 89 89 446 446 446 67 212 131 133 364 276 109 109 443 443 120 120 271 150 39 433 433 160 6 6 227 419 439 439 439 225 225 225 225 237 47 47 491 80 491 373 373 373 155 155 487 487 487 374 374 216 216 283 455 455 43 276 109 109 443 443 443 150 150 86 86 238 272 397 397 364 174 174 174 174 319 348 195 195 195 404 404 229 491 247 312 126 292 292 23 23 23 101 101 149 391 228 491 289 320 7 241 367 367 367 367 192 192 176 135 135 135 200 464 464 113 113 113 113 113 167 449 34 125 125 125 125 466 22 455 455 8 259 180 376 376 376 376 376 460 178 178 458 192 219 219 219 180 180 106 306 306 306 306 306 306 59 37 24 131 404 229 229 247 126 126 326 326 326 408 408 149 228 228 289 491 144 445 210 210 210 210 210 203 53 44 44 44 44 349 234 234 234 261 425 386 386 431 151 151 240 240 285 34 69 462 462 130 402 402 196 217 217 217 217 473 65 443 443 139 175 175 241 81 84 84 84 496 274 274 274 236 32 401 36 310 107 395 485 374 374 374 374 132 132 132 242 116 33 33 33 394 478 478 232 172 172 115 273 319 319 319 348 348 466 250 241 431 431 428 428 428 428 146 358 385 233 227 419 419 439 439 78 170 170 491 47 491 2 491 2 316 316 316 491 316 491 73 73 73 491 354 159 159 159 35 35 198 22 5 448 448 14 411 411 213 213 213 213 213 186 39 342 86 86 238 6 272 472 397 336 276 174 174 174 174 275 388 303 303 117 48 417 78 170 491 491 421 491 128 128 193 193 17 +103-1240-0025 103 755 1 18 38 13 25 31 39 36 17 3 33 1 5 17 23 19 24 29 31 5 37 12 5 9 23 36 24 1 38 8 33 1 10 13 30 20 33 30 20 40 19 25 12 5 23 13 16 33 6 30 10 14 11 1 5 25 11 25 3 11 19 26 31 23 13 25 11 14 9 14 10 19 40 11 7 25 19 25 12 5 18 3 23 27 9 8 12 5 9 30 35 22 1 38 5 40 17 30 20 25 11 27 37 14 9 8 5 33 4 26 17 5 23 5 37 8 25 40 1 18 20 30 31 4 33 24 3 30 19 23 5 22 5 34 9 14 33 1 38 13 25 32 20 31 4 33 4 33 6 23 1 6 23 38 20 40 31 23 8 33 23 20 11 19 31 33 30 5 31 33 16 5 23 5 37 31 5 25 32 8 25 1 12 1 2 2 4 3 2 2 5 7 7 2 4 4 5 2 4 2 4 2 4 1 2 5 3 8 10 2 8 6 7 3 6 3 3 4 5 2 5 3 2 2 2 2 3 4 4 2 6 3 5 3 5 14 6 2 2 3 6 3 4 7 7 2 3 3 3 5 4 6 6 4 5 4 5 3 1 2 1 3 5 3 5 4 3 4 3 2 4 4 3 6 19 3 2 4 4 3 5 2 3 5 3 3 4 6 2 8 2 4 2 1 3 4 5 11 6 9 38 5 3 4 6 4 4 3 1 2 3 3 2 6 3 5 3 5 7 3 4 2 2 4 4 4 4 2 2 3 3 8 9 6 5 3 4 2 6 2 4 3 2 3 3 2 5 2 2 2 4 1 2 2 2 2 2 4 2 4 4 5 4 8 17 17 17 296 296 317 435 491 184 184 184 184 320 345 109 409 330 330 67 77 77 54 54 219 152 152 152 152 143 129 144 144 180 106 405 405 405 206 206 167 167 35 227 227 419 225 225 226 226 209 44 44 44 416 458 458 208 208 425 386 386 431 278 278 278 53 53 76 76 270 342 342 172 224 69 462 130 130 198 198 22 283 455 455 129 259 354 425 425 241 431 374 374 374 374 374 132 132 413 203 381 381 381 212 212 32 197 197 197 197 491 197 197 7 7 364 276 346 346 346 428 428 428 146 146 358 35 35 401 131 472 472 401 80 80 491 310 107 107 395 351 264 264 468 468 468 337 337 337 324 422 24 36 161 161 487 487 288 213 213 246 318 318 49 342 168 340 340 116 466 22 283 455 455 251 241 431 431 443 443 169 169 352 402 402 272 483 14 411 153 153 153 372 372 396 313 36 310 107 395 334 334 59 313 24 24 404 427 247 247 126 126 292 326 326 326 326 326 326 101 149 149 149 491 412 83 55 55 55 322 67 10 10 309 479 331 331 284 405 405 206 206 240 325 34 176 176 135 135 328 200 200 200 248 248 478 162 68 68 68 26 26 26 241 431 432 432 432 330 64 64 212 131 300 382 382 382 245 245 129 129 354 354 498 498 498 498 396 186 35 36 310 107 107 50 50 50 50 50 49 342 342 68 221 336 384 371 180 315 315 315 450 450 348 94 199 340 340 466 22 283 455 455 58 72 72 437 437 481 481 481 481 175 175 81 84 84 84 274 274 8 259 354 62 62 62 62 216 216 22 283 455 8 129 259 190 380 380 499 496 496 496 274 233 233 458 419 427 229 247 126 126 292 292 292 292 292 23 23 23 23 101 101 101 149 149 228 228 320 345 141 141 141 281 453 142 221 221 336 208 79 79 79 288 360 360 360 434 434 339 64 212 131 180 180 410 410 410 410 173 29 29 382 245 245 8 8 62 62 62 62 62 146 464 44 44 44 236 36 108 119 119 351 351 486 365 365 365 200 200 212 212 302 302 302 175 175 462 462 462 462 4 4 4 280 106 284 480 480 480 480 480 85 85 299 299 299 299 339 303 471 471 471 49 433 433 433 160 112 112 439 56 56 237 237 237 491 28 491 28 491 362 362 362 491 362 362 491 362 362 491 491 362 491 211 491 491 369 369 21 21 21 326 408 408 408 408 228 491 289 491 373 451 451 286 286 286 286 468 468 313 186 162 232 68 115 273 470 486 486 460 460 240 35 472 196 196 70 65 65 495 406 467 288 139 175 175 423 423 423 143 129 144 27 437 437 151 151 169 169 164 164 164 221 336 354 29 334 334 59 59 385 233 465 419 439 439 225 417 417 80 80 80 7 345 409 409 409 409 76 310 107 400 400 30 30 422 422 342 342 273 470 486 486 460 240 285 34 415 415 415 325 131 106 106 297 297 297 375 375 98 98 263 417 417 417 417 237 237 237 237 80 491 491 483 287 287 297 297 297 297 297 43 43 345 109 109 109 171 171 318 186 162 232 68 68 26 26 425 241 431 428 428 428 146 143 26 26 359 166 166 166 301 143 36 490 490 490 38 162 482 482 238 6 161 487 499 151 151 150 150 86 142 142 393 262 100 100 175 81 462 462 130 162 68 68 273 319 319 319 348 33 64 212 310 107 395 180 480 480 480 85 299 299 303 48 229 229 247 15 15 193 193 193 17 +103-1240-0026 103 416 1 4 25 11 18 20 30 32 20 31 4 33 25 7 1 25 19 33 19 26 1 4 25 11 12 5 33 15 9 5 23 9 19 18 8 25 11 18 14 38 5 40 23 15 11 16 14 31 5 29 14 1 24 19 31 19 40 30 15 10 5 23 1 9 19 16 6 30 32 20 18 4 11 16 13 30 23 20 22 23 27 40 11 12 5 11 6 30 1 21 8 1 2 4 5 3 5 4 6 5 4 3 10 16 7 2 4 2 8 15 6 2 1 2 2 4 5 2 2 2 2 2 5 5 1 2 3 3 3 1 4 2 4 3 3 3 5 2 4 7 60 4 2 4 3 4 3 4 3 4 8 6 3 4 4 3 2 5 2 2 1 3 3 3 3 3 3 4 3 4 3 2 2 2 2 5 8 5 17 17 17 296 296 363 363 52 52 52 52 52 52 51 51 51 51 184 184 289 491 209 83 83 194 194 194 194 322 67 67 131 183 183 183 451 286 286 286 286 468 468 313 186 99 338 400 400 400 400 30 422 186 162 232 68 68 115 470 486 486 486 460 167 167 35 401 196 309 479 331 315 315 315 315 315 450 450 98 98 417 417 417 417 417 417 237 237 47 47 491 47 491 80 80 73 80 80 7 7 309 309 479 278 278 278 278 36 449 449 176 176 328 328 328 200 303 117 98 13 417 417 417 417 237 237 47 47 47 491 80 80 80 80 435 209 83 83 194 194 322 67 67 212 22 5 455 236 36 108 119 351 351 171 171 171 252 8 29 302 302 497 497 8 354 255 255 58 58 72 72 480 480 480 480 299 299 339 212 131 156 156 156 156 245 245 43 345 141 141 281 342 26 26 241 431 476 476 476 252 252 36 393 393 155 332 332 332 186 162 342 115 273 151 151 151 215 215 29 29 334 334 59 59 59 263 263 417 417 417 237 237 237 491 28 28 28 362 362 491 362 362 491 362 362 362 491 362 362 362 491 362 362 362 218 218 218 218 218 218 491 491 211 211 369 491 369 369 369 369 369 369 369 369 260 260 163 163 163 316 316 316 491 316 491 73 289 289 7 217 473 258 258 258 342 342 224 494 494 368 453 9 142 397 147 380 329 329 329 329 252 143 310 107 395 302 302 302 375 375 375 98 98 13 225 225 225 225 80 80 80 80 80 320 354 255 255 255 349 349 155 155 148 148 148 372 372 313 186 338 400 400 400 30 58 110 254 254 254 314 90 393 205 25 470 264 264 468 468 134 359 359 166 166 166 422 129 458 208 208 386 386 496 496 496 496 368 368 453 9 142 198 22 22 283 455 384 371 106 153 153 153 372 372 372 59 59 452 263 229 247 126 126 326 193 193 +103-1240-0027 103 698 1 12 13 30 38 14 34 30 20 29 23 15 33 31 23 15 11 1 31 27 12 5 33 24 3 30 19 23 5 24 5 31 33 9 20 19 22 31 29 13 22 33 19 26 31 5 24 38 5 25 18 27 24 38 19 34 24 4 34 39 36 33 19 33 20 1 9 5 33 12 5 11 19 32 19 40 38 14 13 37 30 20 11 15 11 19 32 19 40 1 5 25 11 12 13 30 38 5 40 27 25 23 20 22 30 4 9 4 29 5 23 29 30 19 40 14 37 40 5 25 11 38 5 25 22 8 25 11 5 37 22 15 22 1 31 27 12 5 33 12 20 19 22 31 29 13 22 33 19 11 22 5 24 29 5 25 20 22 35 11 25 3 33 9 20 13 25 20 29 14 33 19 22 39 5 23 14 22 5 24 29 5 25 20 1 14 3 2 3 3 4 5 3 4 5 3 4 3 3 4 7 5 16 6 3 3 2 3 3 1 3 4 4 3 4 3 3 1 2 3 2 4 2 3 2 4 2 2 6 5 2 4 2 2 3 5 4 4 2 2 4 3 4 3 2 3 3 3 5 11 25 6 4 3 2 2 5 2 6 4 4 3 3 4 3 3 2 3 7 4 3 5 5 11 7 4 2 1 2 1 3 2 2 3 3 3 3 4 6 2 4 3 4 3 2 2 3 2 2 4 11 3 2 3 5 3 4 3 5 6 5 2 1 2 3 4 7 6 15 6 2 3 3 2 3 3 2 4 1 3 2 4 2 2 2 5 3 3 3 2 2 4 4 2 3 2 4 3 2 5 2 4 3 2 3 5 2 4 2 1 3 2 5 2 4 2 2 2 7 19 17 17 17 296 296 317 491 491 184 184 184 184 184 320 320 127 114 0 0 0 378 378 43 347 347 347 347 313 186 164 164 164 119 487 487 487 288 213 324 324 301 143 259 74 425 425 386 386 431 403 171 171 252 24 270 270 342 26 26 26 241 241 431 403 403 171 171 207 358 37 24 131 427 229 247 247 126 126 292 292 326 326 326 326 326 101 101 101 149 149 228 491 373 66 115 273 273 496 496 274 216 216 45 45 45 236 35 196 196 217 65 65 329 495 406 467 256 256 139 175 175 423 423 423 423 423 399 217 70 65 65 151 169 150 342 142 221 336 420 420 420 464 154 154 154 458 458 96 86 105 105 336 470 470 151 178 178 96 96 36 272 176 135 328 200 200 248 248 478 342 68 68 115 273 231 231 231 53 250 250 174 174 174 319 348 33 58 72 72 72 437 350 350 350 350 203 203 250 250 250 333 333 333 220 220 402 196 196 217 217 65 486 486 460 460 169 164 25 485 485 485 374 422 143 36 377 87 87 236 36 108 108 119 213 213 213 213 246 246 246 19 19 454 454 229 170 491 312 312 312 187 292 292 292 12 12 12 21 408 408 408 260 391 491 491 316 491 316 491 73 289 7 7 7 354 159 159 159 159 240 314 35 35 127 5 5 455 236 129 239 384 371 278 278 278 99 436 436 395 395 50 50 50 50 49 9 142 397 336 347 347 347 467 14 411 204 204 204 204 204 29 337 337 337 301 422 239 384 93 93 93 93 171 171 252 252 314 239 384 371 371 278 278 99 99 436 436 395 50 50 50 50 50 185 185 269 433 390 390 160 112 97 225 225 225 225 225 225 80 491 491 412 83 55 55 55 322 466 127 0 0 222 378 43 345 141 141 281 453 168 168 350 350 350 350 348 250 466 81 166 166 324 422 143 401 259 208 208 208 190 487 499 499 486 460 460 215 280 106 486 486 460 215 215 74 100 302 497 497 122 129 259 190 492 492 492 368 9 9 168 300 498 498 498 498 498 498 59 59 59 304 173 173 49 269 342 168 89 89 446 446 116 195 195 195 212 133 133 364 276 174 174 174 174 348 348 33 33 90 90 76 144 27 437 437 480 480 480 85 299 299 299 212 131 462 462 462 402 221 259 445 445 351 351 171 171 171 171 358 358 358 233 458 192 427 229 247 126 126 326 326 326 326 326 326 326 101 101 149 391 491 373 66 68 172 115 273 494 240 216 216 45 45 45 45 314 35 198 22 448 448 448 464 154 154 154 458 96 86 105 105 336 470 151 151 178 178 96 96 36 272 191 191 131 472 458 144 27 27 437 319 319 319 53 53 76 35 259 242 242 242 116 41 41 41 324 422 143 465 458 389 389 389 389 314 196 479 331 307 307 61 61 215 35 35 354 420 420 420 3 14 411 475 475 475 475 475 475 475 301 143 129 74 492 492 236 236 129 75 108 278 278 278 178 458 458 192 485 134 134 359 81 300 382 245 143 82 144 27 351 319 319 203 53 394 76 465 29 242 242 242 41 41 41 19 19 454 13 417 491 170 421 421 491 491 491 421 421 128 491 128 128 491 128 128 193 193 17 +103-1240-0028 103 782 1 39 13 33 1 18 38 5 33 5 37 24 4 34 39 36 40 38 8 33 22 3 23 14 5 25 11 12 5 31 6 30 5 23 24 13 30 1 24 19 31 19 40 30 15 10 5 23 38 5 40 17 19 33 19 26 16 13 30 23 20 1 11 19 40 20 38 19 12 19 31 5 25 39 36 41 38 5 23 24 19 31 33 14 20 5 9 7 33 22 38 8 5 33 1 5 25 24 19 31 33 19 30 20 5 31 17 30 20 25 17 15 9 5 23 40 1 17 35 11 20 37 25 19 26 30 15 10 5 23 1 24 3 30 19 23 5 31 13 11 1 9 30 19 31 22 23 20 1 12 19 31 19 40 5 30 20 23 16 8 25 20 37 25 19 26 19 40 5 25 19 33 1 38 27 25 33 39 36 31 19 33 11 7 25 1 22 4 2 4 6 2 3 2 2 2 3 4 4 4 2 3 5 6 4 4 11 3 7 6 1 2 1 2 2 7 3 3 3 4 6 4 14 21 4 2 4 2 5 3 4 4 3 3 2 2 3 3 2 2 2 5 7 3 4 3 7 3 3 3 6 4 4 2 3 2 5 2 4 6 4 5 1 2 4 5 3 4 3 3 2 2 3 5 3 6 4 7 2 6 3 5 5 3 2 7 3 1 3 5 3 4 3 2 5 4 3 6 3 2 5 7 57 2 3 2 3 2 2 2 3 3 3 2 3 8 13 4 2 2 3 3 3 5 3 5 2 2 2 3 3 3 3 7 10 5 3 2 3 3 4 4 4 4 5 9 4 4 3 3 2 4 3 2 1 2 4 3 13 3 2 2 1 2 2 4 2 2 3 7 4 9 17 17 17 296 296 296 296 296 52 52 52 52 52 52 408 408 51 51 228 184 184 289 219 357 357 357 357 240 385 35 401 401 401 197 197 80 80 491 320 276 181 181 181 181 240 285 34 223 223 130 402 196 196 217 473 65 329 329 460 460 169 164 164 485 485 485 485 257 368 368 9 142 397 221 364 364 364 276 346 346 428 428 428 146 252 35 131 472 221 458 144 144 27 437 437 437 437 437 437 481 481 481 481 481 481 175 175 81 300 300 382 382 406 467 467 89 446 116 466 22 283 455 38 162 482 482 482 482 115 273 106 153 372 372 406 467 467 302 302 497 497 399 399 217 217 217 217 473 65 264 264 264 264 264 264 468 468 59 59 59 452 452 263 263 13 78 170 170 170 491 491 2 491 2 187 491 187 187 163 163 163 391 391 391 316 73 73 491 491 7 217 473 258 258 31 342 342 494 494 494 368 9 9 142 397 147 329 329 329 329 329 329 36 310 107 395 302 302 302 497 497 43 345 141 141 141 281 342 221 336 445 180 443 240 325 449 176 135 135 200 248 248 90 393 234 234 234 261 261 25 264 264 264 264 468 313 134 359 359 166 166 166 324 422 32 401 401 401 401 401 239 384 371 278 278 278 368 453 342 168 41 41 41 324 246 301 43 364 333 333 333 220 216 198 114 258 258 31 342 342 224 273 319 319 319 348 10 10 219 219 219 398 485 374 374 374 374 374 368 368 107 395 397 134 100 100 100 497 497 399 217 217 217 217 473 258 258 258 31 342 86 238 6 272 495 495 495 41 41 324 464 255 8 354 180 113 113 113 167 167 35 472 472 221 259 208 208 441 441 441 346 346 265 265 85 85 146 146 146 277 277 277 24 131 449 472 225 226 226 226 226 491 209 287 319 319 348 33 33 250 217 217 473 65 258 258 31 162 232 68 68 68 238 6 272 485 485 468 468 337 337 324 324 464 459 459 459 368 31 9 142 221 336 208 79 79 288 213 213 213 246 339 339 33 248 248 212 465 445 180 171 171 171 171 252 252 8 354 100 100 302 375 375 375 185 49 433 390 18 427 56 56 247 312 312 292 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 163 163 163 491 491 163 491 163 491 491 316 491 316 491 316 491 316 73 73 491 320 465 144 180 484 240 325 34 213 213 213 173 402 196 94 176 360 328 200 248 147 380 329 329 329 329 107 107 302 302 302 302 497 98 98 98 225 225 225 225 225 225 225 225 225 225 225 225 225 225 225 225 320 7 217 70 329 329 495 406 467 139 139 175 175 423 423 423 423 162 68 68 115 273 470 120 120 240 314 131 472 401 401 259 354 190 380 288 288 278 31 342 86 105 105 26 359 359 474 474 474 474 19 19 454 454 454 225 225 225 225 225 225 225 225 225 225 225 7 7 127 114 258 258 31 342 342 168 356 281 453 453 168 44 44 44 42 147 147 380 288 286 286 464 139 139 497 497 349 349 234 261 25 25 480 480 480 480 85 85 85 299 299 339 339 94 199 360 360 213 360 173 173 402 196 94 199 176 328 328 200 200 464 356 356 430 453 430 430 116 199 277 277 277 277 24 131 419 439 439 439 225 225 225 225 391 391 80 80 491 491 320 345 346 350 496 348 35 310 152 152 152 422 186 342 224 470 278 278 457 36 384 371 180 315 315 315 315 450 450 413 413 303 48 404 13 229 491 247 15 15 193 193 17 +103-1240-0029 103 763 1 18 7 14 6 23 39 6 30 16 27 22 31 1 31 5 24 34 19 26 12 4 33 16 6 30 23 4 22 5 37 13 25 20 5 12 14 25 15 24 8 33 9 20 22 6 23 11 16 30 13 25 32 19 29 1 19 17 40 19 31 33 5 11 4 25 11 6 23 38 20 40 18 4 11 19 17 40 19 31 33 5 11 9 19 33 38 20 25 24 3 30 19 23 5 22 5 34 9 14 33 5 25 11 24 19 31 19 40 30 15 10 5 23 1 19 25 31 29 8 33 5 37 1 6 30 29 14 18 4 29 31 9 19 22 5 40 5 37 1 12 13 30 11 19 31 19 24 5 23 4 30 5 33 20 1 24 3 30 19 23 5 38 5 40 5 33 6 23 1 10 5 7 3 4 4 1 4 3 5 2 7 7 36 7 3 4 4 3 6 3 4 3 5 2 2 5 6 7 6 6 4 3 6 3 3 3 3 9 6 6 4 2 5 4 4 4 4 6 1 3 3 5 4 5 14 4 5 4 3 5 2 5 2 6 2 3 3 3 3 4 3 5 3 3 2 4 2 4 3 3 2 2 2 2 4 3 2 2 3 2 2 3 3 3 4 4 3 5 4 2 2 2 1 2 2 4 2 4 3 4 3 4 4 17 4 3 4 4 5 3 7 14 9 6 3 4 3 3 4 4 5 2 3 6 4 4 6 8 5 3 2 3 2 2 5 2 2 2 4 4 3 3 2 7 50 3 1 5 2 3 3 2 2 3 3 9 10 10 3 17 17 17 296 363 363 51 51 51 184 491 373 72 268 268 268 268 268 88 88 88 353 353 406 467 106 297 297 297 297 293 293 219 219 464 106 106 387 387 372 349 349 205 261 25 106 496 496 496 233 233 233 233 270 270 433 390 390 18 112 112 439 439 439 439 439 78 78 491 491 28 28 28 2 2 2 491 2 2 341 341 491 341 341 163 163 163 163 163 163 163 316 491 316 491 491 491 73 491 373 66 68 172 115 273 231 231 231 231 53 53 394 76 164 214 214 214 214 214 328 200 200 248 248 248 212 127 45 45 92 92 167 167 35 472 393 393 155 155 332 332 332 387 134 251 251 241 431 431 376 376 376 460 460 169 178 35 401 458 144 192 69 69 223 223 223 223 223 130 130 352 352 402 483 440 411 475 475 475 94 475 475 475 324 324 464 180 493 493 493 493 216 300 382 382 313 10 10 479 331 290 290 290 290 434 434 434 434 203 399 217 70 70 65 65 428 428 428 146 146 24 131 472 221 259 420 420 420 420 420 143 458 144 144 27 437 481 481 481 481 481 293 122 122 131 472 393 393 234 234 234 261 487 288 288 432 330 348 64 76 310 107 107 395 395 459 459 459 459 215 233 131 427 229 247 247 126 326 326 326 326 326 101 101 149 149 228 228 412 188 154 154 154 416 416 96 368 453 453 278 278 278 278 278 31 342 86 86 238 6 272 191 191 191 191 37 325 335 335 440 145 89 194 446 446 67 212 131 34 106 297 297 297 293 43 345 109 109 109 171 171 368 453 342 58 72 72 110 110 486 486 460 240 240 325 34 154 154 154 416 96 96 453 168 278 278 278 31 39 86 238 6 272 191 191 191 314 133 259 354 255 236 236 108 119 397 487 360 360 360 434 339 33 250 217 65 65 329 495 406 288 288 139 175 175 423 423 423 423 129 458 144 27 437 151 151 169 169 402 402 221 401 401 354 495 498 59 396 313 325 449 89 89 446 116 250 217 473 258 258 31 342 224 494 494 494 368 142 142 397 147 380 329 329 329 329 329 36 310 107 395 302 302 302 375 98 98 229 82 247 126 126 126 326 326 326 326 326 326 101 408 149 149 228 289 412 188 188 340 121 394 478 478 68 86 105 336 354 106 265 265 428 428 146 146 240 325 449 34 223 223 223 223 223 223 173 173 402 352 352 352 352 352 352 97 97 417 197 417 237 237 237 237 237 80 491 80 80 80 209 287 157 157 157 372 245 245 129 129 74 492 492 492 492 58 110 486 486 486 460 215 215 35 96 66 68 68 342 221 336 336 354 420 420 422 143 458 144 27 27 351 151 151 151 368 453 342 168 168 223 223 223 223 223 223 173 173 352 352 352 352 97 97 225 225 225 225 80 491 127 114 222 222 468 313 313 325 34 490 490 31 342 342 342 273 494 203 70 65 134 134 175 175 431 486 486 486 486 468 468 468 469 469 469 36 449 449 41 41 41 19 19 454 417 170 170 170 491 28 28 28 28 491 28 362 362 491 362 362 362 362 491 362 491 491 491 362 211 211 211 369 491 369 369 369 369 369 21 21 21 21 21 21 21 21 21 408 408 408 149 149 228 289 491 320 473 65 329 329 245 406 406 380 134 139 139 175 423 423 423 423 345 141 141 281 281 9 168 44 44 44 129 129 108 108 119 119 437 437 437 481 481 481 481 481 481 481 182 182 182 182 293 293 497 497 497 98 98 404 225 225 193 193 +103-1240-0030 103 725 34 19 25 38 35 24 5 25 38 19 34 4 26 17 5 23 40 4 25 11 38 19 12 7 33 22 14 37 40 1 18 14 11 3 30 22 18 13 30 32 27 11 31 5 24 17 30 15 31 33 30 20 22 31 5 25 11 38 5 40 6 23 38 20 40 33 38 19 31 33 19 11 5 29 19 25 5 18 3 30 11 23 19 33 5 23 25 3 33 9 19 18 8 25 11 1 38 19 34 33 36 38 8 14 18 13 30 29 19 25 40 31 33 5 22 5 17 30 13 31 19 37 23 20 34 30 36 19 33 1 32 20 23 35 22 33 23 8 22 5 38 35 24 5 25 5 37 25 4 30 27 19 22 31 29 19 30 20 5 25 31 5 25 11 30 19 21 19 11 22 3 25 32 5 25 31 1 38 19 10 32 20 38 3 40 1 8 4 7 3 4 3 2 5 3 2 2 8 5 2 2 5 6 2 2 3 2 2 2 7 3 8 8 4 10 7 6 2 5 5 3 5 2 3 5 9 5 4 3 2 6 3 4 5 7 2 3 5 3 3 2 1 2 2 2 3 3 3 4 2 4 4 1 2 4 2 2 3 4 4 1 3 2 5 4 4 1 3 2 1 3 2 4 5 4 2 2 6 8 4 3 3 3 2 6 6 10 5 6 4 6 2 4 4 4 3 2 3 3 3 3 3 4 3 2 6 2 2 3 3 5 4 2 4 6 27 7 4 4 3 3 2 4 3 3 3 4 3 3 2 2 2 3 3 7 2 6 2 3 3 3 4 3 3 3 3 2 2 2 3 2 3 3 1 3 6 3 4 4 3 4 9 7 3 3 4 5 3 4 7 10 13 17 17 296 296 4 261 25 470 278 278 278 330 116 33 195 195 212 90 133 364 276 276 174 174 174 203 53 53 473 275 275 388 195 117 404 133 364 345 333 333 220 220 402 335 14 411 145 145 145 365 365 365 360 200 64 212 212 302 302 302 302 497 497 497 497 49 269 342 342 168 89 89 194 446 446 64 133 133 345 333 333 220 216 114 180 113 113 113 113 450 167 285 131 472 221 458 144 208 27 27 498 498 498 498 498 59 59 59 59 59 173 173 270 270 270 390 390 390 112 112 417 417 417 237 47 491 491 491 80 80 491 491 373 156 156 156 156 156 313 236 239 259 384 180 106 106 306 306 306 396 396 178 143 458 192 445 72 110 351 264 264 264 468 468 468 382 313 186 99 338 338 338 338 395 395 84 84 496 496 274 122 314 314 478 478 68 115 273 231 231 231 53 53 90 90 32 32 144 208 79 79 380 288 288 171 171 171 252 186 39 342 232 482 482 238 6 161 79 487 288 213 213 213 358 358 233 270 270 270 342 168 89 55 322 322 250 345 141 141 281 453 9 168 106 297 297 297 297 293 43 345 109 109 109 368 31 342 86 238 6 108 119 397 487 278 278 31 31 86 86 272 191 191 191 236 325 34 230 230 230 230 215 35 259 340 340 340 94 44 44 44 44 72 72 72 437 306 306 306 306 306 396 396 313 26 251 241 431 278 240 285 285 302 302 497 122 10 10 479 331 331 405 405 206 206 167 449 472 221 336 354 255 255 58 72 72 72 437 480 480 480 480 480 85 299 299 299 299 339 195 243 212 131 419 225 397 133 320 345 333 333 220 220 164 164 472 221 401 491 108 108 119 374 374 374 374 374 374 132 132 132 132 132 43 364 276 276 346 346 265 265 85 85 146 468 468 382 313 337 58 183 72 72 110 110 264 264 468 468 245 215 35 259 74 351 275 275 116 379 195 471 471 478 66 68 68 238 6 272 189 189 189 189 178 458 192 44 44 236 416 239 208 79 79 380 151 151 151 150 39 342 342 224 494 494 134 8 402 359 359 474 474 166 422 349 164 164 164 487 487 487 487 374 374 88 88 277 277 277 277 385 75 227 419 427 491 491 247 312 312 292 292 292 292 292 292 292 21 21 1 21 21 21 21 21 21 21 408 408 408 408 149 491 491 373 338 400 400 400 400 30 301 422 251 251 367 367 367 367 367 367 35 96 259 26 26 241 266 266 266 266 266 266 35 192 44 44 44 43 364 276 174 174 174 174 53 53 65 242 242 94 199 462 462 462 402 196 309 309 479 331 486 315 315 315 460 450 406 467 84 84 84 88 88 88 154 154 154 96 96 232 232 105 105 336 354 485 286 286 286 468 468 337 337 11 11 11 11 379 379 471 77 342 224 89 89 116 33 90 42 147 147 380 288 278 236 36 449 191 191 191 131 472 458 144 27 437 370 370 370 370 370 348 64 64 310 310 436 395 459 459 11 11 379 303 471 243 270 269 433 160 160 18 112 439 225 225 225 225 80 80 491 491 320 345 407 407 407 236 233 36 310 107 338 338 400 400 400 400 301 378 43 345 109 346 346 141 355 355 355 37 185 185 433 433 433 160 160 112 427 56 491 491 312 15 15 15 15 193 193 193 193 193 17 17 +103-1240-0031 103 842 1 9 5 33 12 13 30 38 5 40 5 31 15 37 19 26 31 5 24 34 19 26 5 9 7 33 18 14 24 7 34 38 19 10 1 19 16 19 33 18 4 11 9 19 25 13 37 14 31 27 31 23 8 33 23 20 11 19 37 13 23 5 29 33 1 24 8 33 18 4 37 9 19 25 22 5 25 31 19 11 14 11 19 25 11 19 22 5 33 19 37 5 37 5 31 13 25 31 5 37 18 39 36 24 14 1 38 19 30 6 23 29 30 19 33 20 38 13 23 1 31 13 11 24 19 31 19 40 30 15 10 5 23 1 8 1 38 5 40 22 8 25 11 5 37 5 16 30 15 11 39 36 38 14 25 33 1 12 27 1 18 38 13 25 8 31 6 24 4 34 39 36 31 33 3 30 33 19 26 6 16 33 5 11 15 1 8 34 6 33 24 15 9 20 18 20 38 5 40 17 27 19 26 33 5 12 5 11 3 22 33 14 40 1 16 2 3 2 1 2 2 2 2 4 3 6 6 2 2 6 7 2 4 3 2 3 2 3 5 2 5 3 4 11 6 2 4 12 2 6 5 2 1 2 2 1 3 5 4 6 4 4 6 3 6 2 5 3 2 2 3 2 4 2 5 4 3 4 17 6 4 2 1 2 2 2 2 3 3 2 3 5 2 2 2 2 3 4 2 3 4 2 2 4 2 3 3 3 4 2 4 3 1 3 3 2 3 3 6 44 4 3 2 4 3 4 2 1 2 3 3 3 10 7 6 3 3 2 2 3 2 4 3 3 4 3 4 35 7 1 2 1 3 3 4 2 1 2 2 4 4 2 5 2 4 5 4 5 2 3 1 3 11 1 2 2 2 2 5 5 6 4 6 3 2 4 3 2 3 2 2 2 3 3 3 3 2 2 10 20 7 3 4 3 2 4 3 3 3 3 1 2 3 4 3 4 2 2 2 3 1 3 4 5 2 5 11 16 17 17 17 296 296 363 51 51 51 51 51 491 184 184 184 184 320 159 159 159 159 240 35 127 0 0 222 378 345 347 141 141 281 342 342 44 44 44 38 162 232 68 172 115 470 470 171 171 171 252 173 173 176 176 135 135 200 200 248 248 478 232 232 232 68 115 273 231 231 231 231 53 76 76 164 164 214 214 214 328 200 464 255 255 8 259 180 113 113 113 113 450 285 285 58 156 156 156 156 156 245 245 399 217 217 473 65 315 315 315 315 450 450 450 450 169 352 164 164 97 397 397 364 407 407 407 407 407 407 36 310 310 107 107 107 447 97 483 197 226 226 80 80 209 209 188 118 118 118 118 352 352 25 177 177 254 325 34 254 254 254 314 129 259 137 137 137 137 137 116 195 195 195 335 14 226 411 145 463 463 463 463 29 29 382 313 186 186 162 68 172 344 344 344 344 344 274 186 162 232 68 26 26 26 386 431 428 428 146 146 35 26 359 166 166 166 236 36 384 490 490 490 173 280 280 180 443 443 139 175 175 81 81 469 469 469 215 233 96 96 227 419 427 229 247 126 126 126 292 326 326 326 326 326 101 101 101 149 149 228 491 320 7 70 70 65 65 428 428 85 146 24 325 202 202 202 402 129 259 137 137 137 137 33 76 465 465 27 121 121 33 394 478 68 68 115 115 278 278 278 285 300 382 313 24 325 34 121 121 116 33 394 212 239 371 371 278 278 143 458 458 192 469 469 325 34 459 459 459 173 173 280 69 69 223 130 280 44 44 44 38 342 68 115 273 432 432 330 379 64 77 77 224 494 462 462 402 183 183 219 485 485 485 374 374 132 399 53 334 334 334 59 452 263 417 417 417 417 237 237 28 28 491 28 362 362 491 491 491 362 491 362 491 362 491 491 40 211 211 369 369 369 369 21 21 21 21 21 21 260 260 260 260 391 391 391 491 73 289 289 7 345 109 109 109 347 467 467 499 297 297 297 297 293 293 35 259 74 190 104 104 104 285 34 41 324 301 43 276 109 109 139 139 139 293 293 293 98 98 13 13 417 417 417 417 417 417 80 80 80 80 435 66 66 179 179 179 179 179 314 314 196 217 473 258 258 31 342 224 494 494 281 142 142 397 147 380 329 329 329 329 143 310 107 395 395 302 302 375 98 98 13 417 417 417 170 249 20 28 305 491 442 305 305 2 491 491 2 2 249 491 491 305 305 366 491 491 366 316 491 435 435 435 435 435 435 289 412 287 287 111 111 111 438 378 345 141 141 281 9 142 336 144 27 480 480 480 146 299 64 34 34 462 130 29 44 255 38 349 205 205 261 190 380 288 288 171 171 252 252 314 239 219 219 219 152 152 374 374 132 132 43 364 276 109 372 372 59 59 396 339 243 227 472 472 198 216 127 114 84 84 84 16 16 16 274 98 98 13 225 225 225 345 409 409 409 409 94 199 111 111 111 438 438 162 342 172 115 273 106 481 481 426 426 206 399 217 217 473 486 486 486 460 460 169 164 164 485 485 485 485 374 318 186 162 54 238 6 272 499 499 499 206 240 285 34 176 135 200 200 106 284 405 206 169 169 402 96 36 272 469 469 236 325 93 93 93 207 207 19 454 454 229 82 247 126 126 126 326 326 326 326 326 326 326 326 326 101 101 101 149 149 228 412 412 287 111 111 111 438 349 164 164 106 106 405 405 206 167 457 217 473 473 476 476 171 252 252 8 420 420 324 324 183 451 30 30 30 378 141 141 141 281 342 142 221 144 180 84 496 88 88 176 176 135 135 248 248 108 377 123 123 216 22 283 455 236 384 180 91 91 91 91 91 178 458 96 6 272 334 334 334 304 304 304 185 185 323 390 390 18 112 439 417 237 237 237 237 237 237 237 237 237 128 128 128 128 128 128 193 193 17 +103-1240-0032 103 738 1 24 14 19 23 5 40 23 19 29 31 33 38 19 10 33 5 25 11 14 31 33 4 25 11 19 26 23 20 1 32 20 18 4 11 19 22 31 29 13 22 33 19 11 24 19 31 19 40 30 15 10 5 23 5 29 1 32 20 18 4 11 25 27 25 12 5 33 12 5 31 8 33 5 37 24 4 34 39 36 21 6 25 33 19 26 6 16 31 27 5 25 5 22 7 25 33 5 9 23 20 1 38 35 11 9 20 33 36 24 5 10 16 14 18 14 25 15 9 14 40 22 39 35 30 20 3 31 5 33 20 1 27 25 27 1 8 24 22 38 8 33 38 13 23 1 6 23 12 27 8 18 4 11 5 9 4 11 18 13 11 15 22 39 13 31 33 14 11 15 1 32 20 31 13 11 1 19 5 4 3 4 3 4 3 2 6 6 4 3 3 7 2 2 2 2 3 3 3 5 1 2 1 4 3 7 23 3 4 2 3 2 2 4 2 3 3 4 2 3 2 3 2 3 2 4 3 3 4 4 3 5 7 6 6 4 4 2 3 4 8 1 2 2 2 2 3 5 7 3 3 2 3 6 4 2 5 5 5 2 3 2 3 5 3 6 6 2 2 3 5 4 3 2 2 3 3 7 16 4 2 2 3 5 7 4 4 3 4 5 2 3 2 3 5 4 3 4 3 2 2 3 4 4 5 2 2 9 27 7 4 12 15 8 3 3 3 5 4 2 2 8 1 7 5 2 6 6 3 3 2 3 3 8 2 4 2 3 4 4 2 3 4 3 1 3 7 5 6 3 4 4 5 10 17 17 17 17 296 317 317 317 491 317 461 461 461 461 184 184 184 184 491 7 7 70 65 329 329 42 406 406 288 134 139 139 175 175 423 423 423 423 31 342 26 26 251 241 431 278 278 278 215 233 233 270 270 433 433 86 238 336 336 82 108 397 397 441 441 109 278 278 385 233 36 310 447 447 238 6 272 34 319 319 348 64 212 300 300 313 186 162 232 238 6 470 470 294 294 294 294 330 94 94 176 176 328 200 200 248 359 359 474 474 474 19 19 454 454 229 170 491 247 312 126 292 292 292 292 292 21 21 21 408 408 408 408 408 391 391 228 491 373 373 400 400 400 30 30 58 58 110 254 254 254 254 325 34 154 154 458 96 66 342 105 105 336 470 151 151 178 178 35 96 401 75 272 191 191 191 314 196 217 473 258 258 258 31 342 224 494 494 368 9 142 397 147 380 329 329 329 329 252 36 107 107 302 302 302 302 175 175 431 230 230 230 230 215 35 227 419 439 439 439 225 225 47 47 491 80 491 373 373 338 338 400 400 400 30 30 58 58 110 254 254 254 254 254 35 196 196 309 479 331 84 84 84 496 274 274 413 413 466 466 45 45 45 45 216 198 22 283 455 38 162 68 68 115 273 273 265 428 428 146 146 146 35 449 34 69 69 130 402 402 196 217 473 486 486 486 460 460 169 169 164 164 485 485 485 374 132 301 236 129 310 310 107 395 180 106 426 426 426 206 348 76 465 449 449 176 135 200 200 199 106 426 405 426 206 402 402 478 232 68 68 115 344 344 344 344 88 14 14 411 319 319 319 94 199 154 154 458 445 445 351 351 351 315 315 450 413 413 76 449 449 134 134 134 8 26 359 359 474 474 474 324 19 454 229 247 247 126 126 326 326 326 326 326 326 326 101 101 101 149 149 228 320 7 345 389 389 389 389 314 401 259 420 420 420 420 422 236 129 36 108 119 344 344 344 374 374 132 132 399 70 383 383 383 383 383 383 36 107 447 393 393 155 332 332 332 245 58 156 156 156 313 10 10 479 331 290 171 171 252 173 8 29 334 304 304 304 186 54 142 221 336 445 445 485 485 485 468 468 468 337 337 485 464 180 180 405 405 169 169 150 342 342 224 469 469 325 325 41 41 41 19 19 454 13 439 78 78 491 491 28 491 312 341 491 341 12 12 341 491 12 12 260 260 260 260 260 391 391 228 491 491 289 289 209 287 287 16 16 16 274 413 348 479 331 84 84 84 16 16 16 16 203 381 48 417 417 417 417 417 237 491 47 47 47 491 491 47 491 80 80 491 209 287 287 111 111 111 111 203 203 90 90 76 458 208 441 441 346 428 428 428 146 146 131 133 133 364 109 109 139 139 139 139 375 375 98 98 417 417 417 225 225 225 287 287 297 297 297 297 293 293 216 216 114 84 84 84 16 88 88 111 111 111 111 438 58 58 110 110 110 254 254 240 34 44 44 44 8 354 180 180 376 376 376 376 460 282 240 24 131 183 72 110 110 443 443 240 325 34 207 207 207 324 416 416 239 219 357 357 443 443 169 150 150 86 238 6 469 469 469 469 325 93 93 93 207 207 19 454 417 417 417 417 80 491 435 373 338 400 400 400 30 30 422 162 342 115 273 470 120 120 120 37 24 404 229 229 247 312 15 15 15 193 193 193 193 17 +103-1240-0033 103 720 1 24 4 34 39 36 38 13 25 33 5 9 30 8 33 30 19 37 14 1 38 19 30 17 13 33 19 26 5 23 19 33 5 23 9 28 16 14 24 5 25 6 30 16 5 25 5 31 8 23 5 24 19 25 27 37 5 31 22 27 32 5 1 5 25 11 18 20 40 22 5 24 19 26 3 25 12 5 33 30 15 25 33 5 25 8 33 1 19 16 24 3 30 19 23 5 18 4 11 31 13 11 1 12 5 33 24 4 34 39 36 18 4 11 17 6 25 33 5 9 30 8 33 30 19 37 14 33 5 24 20 33 15 22 4 26 17 14 36 16 14 24 6 31 33 30 15 23 39 5 1 24 19 31 19 40 30 15 10 5 23 22 35 11 25 3 33 18 4 37 9 19 25 24 6 30 5 31 33 3 25 19 32 33 1 11 6 4 3 3 3 3 3 3 3 2 3 2 6 3 3 3 3 7 14 5 1 3 4 2 2 3 4 2 3 2 1 2 2 6 11 2 2 2 2 2 6 3 3 2 2 3 5 5 3 1 3 3 10 3 2 3 4 3 4 6 7 12 7 1 2 1 2 3 4 3 2 2 6 3 4 1 2 6 2 3 4 3 2 2 6 7 41 7 3 2 2 5 3 3 3 3 2 3 6 5 4 2 1 2 2 5 6 4 2 3 2 2 3 3 4 4 3 2 4 3 5 6 2 3 3 5 4 2 3 3 3 2 9 2 6 3 6 6 3 2 3 4 6 5 2 3 3 3 8 18 4 2 3 2 5 3 3 4 5 3 4 2 3 3 4 2 1 2 2 3 2 2 6 5 2 3 3 3 4 3 3 5 4 11 17 17 17 363 363 363 363 51 51 228 491 289 7 217 70 65 329 329 329 329 169 164 164 164 485 485 485 301 378 378 43 345 109 189 432 330 348 64 76 36 377 377 123 123 8 259 190 380 499 499 428 428 146 146 252 35 131 133 147 147 380 288 288 173 173 280 29 334 59 59 452 263 417 417 417 417 417 237 237 237 435 237 237 80 491 435 435 435 7 7 364 345 152 152 468 468 313 416 416 445 180 443 443 240 325 34 135 135 135 200 200 44 44 44 251 241 81 278 278 26 34 302 497 122 8 32 32 32 354 153 153 153 153 387 387 387 207 207 146 301 393 155 165 165 165 165 53 44 44 94 335 14 411 411 153 387 372 372 396 349 349 352 25 242 242 94 199 255 271 38 342 342 115 273 106 265 265 85 85 146 175 175 81 242 203 217 473 89 340 116 195 195 195 195 195 197 309 309 479 331 84 84 496 173 280 29 469 38 162 54 482 105 336 144 180 496 496 496 274 186 99 436 436 395 423 423 423 423 355 263 263 229 82 247 126 126 326 326 326 326 101 101 149 149 228 412 412 83 194 194 194 322 212 131 451 30 356 281 342 342 221 144 27 351 319 319 319 53 176 135 135 200 248 248 248 479 331 106 426 426 125 348 466 466 22 283 455 236 259 161 161 161 487 288 290 290 290 434 434 339 248 76 108 377 351 494 116 94 479 331 428 428 428 428 299 358 358 24 227 419 439 439 417 417 237 237 28 28 491 28 491 362 491 491 362 362 491 362 491 362 491 362 491 491 362 218 218 40 305 366 491 366 366 366 366 366 491 435 435 316 73 73 289 289 209 188 118 118 118 118 402 221 196 217 473 65 329 329 245 42 147 380 288 134 139 175 175 423 423 423 423 58 110 254 254 254 254 36 478 232 232 68 172 115 273 470 120 120 120 120 24 314 314 472 198 127 45 45 45 45 457 196 217 217 473 65 329 486 486 460 460 169 164 164 485 485 485 485 485 374 132 58 254 254 254 254 314 416 458 144 180 106 426 426 426 413 348 64 76 465 108 377 123 44 236 129 259 190 190 380 499 499 428 85 146 146 143 131 472 133 133 147 147 380 288 288 173 173 280 29 334 59 59 59 313 143 36 377 377 87 87 217 473 65 213 213 213 252 325 34 44 44 44 129 259 445 445 445 351 351 351 486 365 365 360 200 200 200 248 212 192 180 255 495 42 147 380 380 374 374 132 132 132 349 155 155 165 165 165 165 70 65 284 405 206 169 150 162 482 482 482 482 482 238 6 161 487 487 288 288 288 139 139 139 81 337 324 423 423 423 423 423 423 452 263 229 247 312 126 126 292 326 326 326 326 326 326 326 408 408 149 149 228 491 320 7 217 258 258 258 31 54 224 494 494 368 453 142 142 397 147 380 329 329 329 329 252 36 107 107 395 302 302 302 375 375 98 143 401 259 144 389 389 389 389 314 196 309 479 307 307 307 61 61 285 449 202 202 202 130 129 259 354 137 137 137 116 33 250 217 217 70 70 138 138 138 138 138 372 467 467 255 255 38 54 86 238 6 272 106 499 405 426 206 348 199 199 459 469 271 99 447 447 6 6 6 419 439 78 491 305 421 491 421 128 491 193 193 193 17 +103-1240-0034 103 770 1 32 20 38 5 40 4 22 32 5 23 20 31 33 30 19 22 5 25 1 11 5 24 16 14 16 8 37 31 13 22 5 25 40 1 19 33 38 5 40 1 2 12 5 33 1 24 3 30 19 23 5 38 6 40 1 24 15 22 19 26 1 16 5 25 5 37 18 14 1 9 5 33 24 19 31 19 40 30 15 10 5 23 38 5 40 6 23 24 27 31 33 16 6 30 31 33 5 31 5 29 27 40 19 33 1 3 30 39 36 19 25 1 14 25 19 31 33 24 3 30 19 23 5 1 32 20 11 19 24 4 25 11 19 11 1 38 19 25 37 28 31 30 20 33 14 25 11 33 5 18 14 1 39 13 31 5 37 22 6 30 31 1 12 6 2 3 1 3 6 3 2 4 3 5 5 3 2 1 3 3 8 1 4 4 9 4 3 6 10 3 5 2 4 3 5 6 32 4 3 3 3 5 1 14 2 2 5 6 1 2 2 3 2 3 4 4 31 1 2 3 4 2 2 2 7 3 2 2 2 3 8 28 2 4 3 3 2 5 2 5 3 4 4 4 3 3 1 7 2 3 4 2 4 2 5 3 3 5 7 3 4 2 4 6 4 3 10 57 5 4 2 5 2 4 4 8 2 4 5 4 2 2 2 3 4 9 3 7 2 2 2 4 4 2 2 2 1 2 2 1 4 4 6 6 2 2 6 5 3 1 2 2 4 8 24 4 2 4 1 2 5 3 6 12 10 17 17 17 17 296 317 317 317 435 435 184 184 184 373 373 338 400 400 400 30 378 345 141 141 281 453 168 145 145 145 460 460 460 178 96 96 436 447 134 134 134 134 134 359 166 166 166 166 324 186 162 482 482 482 238 6 336 161 487 278 278 178 458 192 192 242 116 116 195 195 394 394 212 401 401 401 401 384 371 180 180 319 319 319 203 53 381 381 381 381 381 381 76 393 155 332 332 332 332 245 349 205 205 261 25 106 265 265 265 265 85 85 85 146 146 173 402 402 66 68 68 115 273 470 151 151 178 458 458 192 242 275 275 379 303 471 471 471 49 433 160 112 427 247 247 312 126 292 292 292 292 292 292 292 292 292 21 21 21 326 21 21 408 408 408 408 149 149 228 228 316 316 73 491 289 289 209 177 177 177 177 131 133 133 141 141 141 141 281 453 342 483 483 226 226 209 287 319 319 319 319 348 348 394 478 478 66 68 68 115 494 494 494 215 129 259 74 74 437 72 72 437 437 496 496 496 496 274 274 368 368 368 9 168 494 134 134 8 100 100 100 100 100 375 375 497 216 198 45 45 45 45 35 196 196 70 65 329 329 329 406 406 467 288 139 175 175 423 423 423 345 141 141 281 342 142 196 217 473 476 476 476 143 458 192 176 135 135 328 200 248 248 393 234 234 234 261 25 319 319 319 348 94 199 223 223 130 402 58 156 156 156 156 59 59 59 452 263 229 247 247 126 126 126 292 326 326 326 326 1 1 1 1 408 408 260 260 391 391 391 391 491 73 73 73 289 491 320 159 159 159 159 159 385 35 196 196 217 473 258 258 258 342 342 224 494 494 494 368 9 142 397 147 147 329 329 329 329 329 329 143 310 107 107 395 302 302 302 497 497 43 345 141 141 141 281 453 168 483 14 226 209 297 297 297 297 297 399 70 65 65 496 169 150 54 238 6 6 472 393 234 261 261 148 148 148 387 372 396 186 186 54 86 238 6 6 472 472 472 482 224 224 494 494 38 162 323 323 224 494 494 129 259 74 437 496 496 496 496 274 274 368 368 9 168 277 277 277 37 24 131 227 419 439 439 439 439 417 237 237 237 28 28 491 491 28 362 491 362 362 362 491 491 491 491 362 362 491 218 362 491 491 218 491 218 218 218 218 218 435 218 366 491 491 305 366 491 366 435 366 491 366 491 366 316 316 491 316 491 316 316 491 73 73 289 289 209 287 430 430 430 430 430 430 219 219 477 477 378 88 109 44 116 116 199 335 14 226 226 226 209 209 411 498 498 498 308 396 313 94 459 459 459 459 271 31 342 86 86 6 272 472 221 196 70 473 329 329 329 406 467 134 134 134 175 175 423 423 423 423 423 263 263 225 225 225 225 225 80 373 373 338 338 400 400 400 30 422 239 384 490 490 399 217 473 365 365 365 365 365 388 64 212 191 191 191 314 133 259 409 409 409 409 33 33 250 32 280 280 153 153 343 387 387 146 358 39 39 86 142 142 397 456 456 456 236 36 108 119 308 308 308 308 308 308 308 388 339 33 394 212 108 123 123 123 123 58 156 156 156 156 59 59 452 263 229 229 247 312 312 126 292 292 292 292 292 1 1 1 1 23 23 23 408 408 408 391 391 316 73 491 289 289 7 357 357 357 271 31 342 168 494 255 402 402 458 208 441 441 153 153 153 387 372 396 396 271 186 39 39 390 390 390 390 390 390 18 18 112 439 439 439 439 237 78 421 128 193 193 17 +103-1240-0035 103 802 1 31 13 11 24 3 30 19 23 5 1 4 40 19 16 17 13 33 19 26 9 28 40 16 14 24 6 30 16 5 25 5 31 8 23 5 24 40 19 25 27 37 5 31 22 27 32 5 38 14 29 3 30 33 5 37 12 20 39 36 41 36 5 23 31 29 30 19 26 38 14 22 6 25 13 25 20 38 13 23 30 13 17 39 5 23 15 33 19 11 4 37 5 25 23 20 16 3 30 24 1 19 25 31 33 13 11 5 37 9 20 19 26 4 25 5 25 18 14 11 5 37 19 25 5 37 15 32 5 25 1 24 19 31 19 40 30 15 10 5 23 16 13 23 33 12 5 33 32 20 18 4 11 30 20 31 20 37 11 5 1 31 5 37 19 30 24 13 25 33 5 23 1 21 27 23 33 1 32 20 34 6 33 19 25 13 22 31 22 23 5 24 15 32 5 25 29 28 25 33 31 1 8 4 2 3 2 2 4 3 3 6 13 5 4 2 4 3 3 2 2 6 3 11 3 4 1 3 5 2 4 2 2 2 5 5 2 2 4 2 2 7 3 2 3 3 4 4 4 4 3 3 4 4 2 2 2 2 2 2 5 6 4 2 2 3 4 4 1 3 8 5 5 4 3 4 4 3 4 5 5 7 3 2 4 2 3 2 3 2 2 3 7 2 2 3 2 5 6 6 3 4 19 5 2 4 3 2 2 2 2 3 3 2 3 2 2 5 4 6 5 3 5 3 2 2 2 3 4 7 2 7 33 4 3 2 3 3 4 3 4 3 2 5 3 4 2 2 1 3 4 4 4 3 6 1 3 6 7 2 3 6 9 8 2 4 6 4 8 2 4 3 3 5 3 5 3 7 8 18 7 2 4 6 4 2 3 2 4 3 2 2 1 3 2 6 3 3 4 7 2 2 5 10 17 17 17 296 296 296 184 184 373 66 172 179 179 179 179 314 196 196 70 65 329 329 495 406 467 288 134 139 139 175 423 423 423 423 263 229 82 247 126 126 326 326 326 326 101 101 101 149 149 228 412 83 253 253 253 453 342 224 118 118 118 118 402 402 221 259 144 445 180 443 240 449 449 176 135 135 200 200 248 248 32 32 354 354 153 153 153 153 387 387 387 85 207 318 185 269 9 142 393 155 165 165 165 165 70 14 14 411 153 387 372 372 349 349 205 352 29 242 116 94 199 255 38 31 342 68 115 273 265 265 85 85 85 175 175 81 203 203 471 471 49 453 168 89 340 116 116 10 10 479 331 84 84 496 274 8 29 459 313 31 162 54 105 105 336 27 496 496 496 496 274 99 99 436 395 423 423 423 43 43 345 347 347 245 245 129 259 74 437 437 306 306 306 206 240 285 449 69 223 130 198 198 283 455 219 219 219 219 219 485 374 374 374 132 132 99 99 161 161 397 134 100 100 100 497 497 186 162 482 142 105 336 336 336 190 380 288 288 360 328 200 200 195 195 248 248 364 364 276 276 109 498 498 498 396 396 178 35 458 192 125 125 125 125 348 199 335 14 411 411 475 475 475 94 475 475 324 324 301 378 43 364 276 109 109 443 443 139 139 139 293 293 497 497 42 42 147 147 380 288 443 443 416 416 458 445 485 134 134 175 175 158 158 158 158 325 449 191 191 191 325 335 14 145 145 486 460 460 173 280 280 242 242 116 379 250 359 81 41 324 324 324 422 349 234 234 261 261 25 106 306 306 306 306 306 306 282 203 203 117 404 229 247 247 126 126 326 326 326 326 326 326 326 326 326 101 408 408 149 228 491 491 412 188 340 340 67 77 478 232 86 68 272 470 470 443 443 240 34 223 223 130 129 259 354 420 420 420 360 135 135 135 200 44 44 44 44 199 335 145 319 319 319 348 348 33 90 72 72 72 72 498 498 498 498 498 396 396 285 285 180 106 284 353 206 206 173 280 280 121 121 116 199 469 469 173 280 418 418 418 418 418 418 99 99 436 436 60 60 298 298 303 303 303 117 404 13 229 491 247 312 126 292 292 292 292 292 292 12 12 12 21 260 305 201 201 201 201 201 201 201 201 201 491 491 316 316 491 316 491 289 289 7 7 473 258 258 258 342 224 494 494 494 281 9 142 397 147 329 329 329 329 329 143 36 107 107 395 302 302 497 497 349 349 234 261 25 180 189 139 139 139 293 167 35 35 198 45 45 45 45 310 338 400 400 400 400 30 30 3 58 110 110 254 254 254 254 254 314 131 133 364 147 456 456 456 38 162 68 68 172 115 444 444 444 444 444 246 246 318 173 402 6 272 34 44 44 44 8 8 401 401 401 197 491 80 491 80 491 80 80 197 66 66 68 172 115 273 494 278 173 8 4 280 485 485 286 286 286 286 468 382 245 245 399 399 217 217 217 217 473 65 432 432 330 348 64 64 465 449 449 302 302 302 497 497 122 129 401 401 401 401 491 310 107 395 395 106 481 424 424 182 182 375 375 122 233 75 227 227 419 419 439 439 439 439 439 237 439 78 78 47 491 47 491 491 316 316 491 491 316 316 73 373 373 373 338 400 400 400 30 422 422 164 164 25 106 106 405 405 405 206 167 449 449 34 340 340 116 94 199 145 154 178 458 96 342 342 224 105 27 386 386 386 386 399 473 418 418 418 418 99 436 436 60 60 298 116 33 250 53 394 76 259 74 441 441 153 387 387 299 299 299 358 243 270 270 433 160 112 427 491 247 247 126 15 15 193 193 193 193 17 +103-1240-0036 103 625 1 24 3 30 19 23 5 4 25 11 24 4 34 39 36 22 5 34 9 14 33 5 37 6 23 29 20 29 5 23 5 11 3 29 33 19 26 5 9 28 1 16 30 5 24 5 25 6 30 16 5 25 5 31 8 23 5 24 1 38 13 23 1 12 5 38 14 23 11 38 5 40 31 14 33 5 25 23 20 33 14 25 19 26 5 29 31 8 11 7 25 1 32 20 38 35 11 9 20 31 5 29 30 8 40 11 4 33 25 5 34 19 26 4 16 33 14 12 19 31 1 25 5 34 19 26 1 17 5 2 7 3 3 4 4 3 3 3 5 4 1 4 6 3 4 3 2 2 2 2 5 4 7 3 3 1 4 2 3 4 2 2 1 3 2 10 21 24 5 1 2 1 2 1 5 3 4 2 1 3 6 6 4 3 8 37 6 3 12 4 2 3 8 6 4 3 2 2 2 9 4 2 1 2 2 3 6 4 1 2 4 2 3 5 4 7 9 7 30 6 2 3 2 1 3 3 3 2 3 4 4 3 3 2 2 6 3 5 2 4 4 2 3 2 3 5 11 27 9 3 7 2 5 25 17 17 17 17 296 296 52 52 52 51 51 51 51 51 184 184 491 320 7 217 70 473 65 65 329 42 42 147 147 147 380 288 134 139 175 175 423 423 423 423 423 335 440 89 89 446 446 212 131 472 196 196 473 65 486 486 460 460 169 169 164 164 485 485 485 132 143 129 401 144 144 27 27 437 151 151 169 164 164 164 401 401 259 29 382 313 285 34 69 223 130 280 106 297 297 297 297 297 293 215 35 35 259 74 74 213 213 213 213 213 252 215 259 29 100 302 175 175 81 255 255 236 384 180 405 405 405 206 215 96 449 135 135 135 200 44 44 44 8 32 32 401 401 401 401 401 354 354 153 153 153 153 153 153 153 387 387 387 207 207 207 207 19 19 454 454 454 229 229 247 312 312 312 126 292 292 292 292 292 12 12 12 12 12 12 260 260 260 260 391 391 391 228 491 373 373 155 165 165 165 165 53 44 44 199 106 106 284 387 372 372 396 349 349 234 261 29 242 116 94 199 255 38 162 232 232 172 115 273 265 265 265 85 146 146 146 175 175 81 459 203 203 203 381 48 404 13 439 78 170 170 170 28 491 187 187 341 2 2 2 491 2 2 362 362 362 362 40 491 366 366 491 366 435 366 366 491 366 491 316 316 316 491 491 435 435 491 289 7 7 364 276 109 109 139 139 139 139 293 293 375 98 98 98 225 225 225 225 225 225 225 465 198 127 5 5 455 43 43 364 276 276 109 109 498 498 498 498 134 134 139 302 293 497 122 122 131 133 345 141 141 141 281 162 232 232 232 232 68 68 115 273 498 498 498 313 240 35 26 26 359 359 166 166 166 422 143 36 108 108 119 308 308 308 308 308 313 94 176 135 200 200 200 230 230 230 215 35 478 232 68 68 273 273 265 428 146 146 416 416 401 401 259 371 180 315 315 315 315 315 315 450 450 450 413 413 303 117 48 404 229 229 491 312 312 126 126 292 292 292 292 292 292 292 292 292 1 21 21 21 21 408 408 408 408 408 391 391 391 228 491 373 373 338 400 400 400 400 378 378 345 389 389 389 314 8 354 420 420 420 422 342 342 224 494 494 129 129 74 190 487 499 499 265 85 85 146 368 453 238 6 272 34 494 236 314 196 196 309 309 309 479 331 231 231 231 231 349 164 164 214 214 214 328 200 200 464 145 460 460 460 169 402 96 272 300 382 313 216 216 114 258 258 258 271 271 39 433 433 433 390 160 18 112 427 56 56 170 312 312 312 187 187 292 12 12 12 12 12 12 12 12 12 408 163 163 163 491 316 491 316 491 491 289 289 289 7 7 309 309 309 479 331 231 231 231 231 169 164 164 164 214 214 214 214 328 328 200 303 117 404 404 439 439 439 439 439 237 237 237 421 421 421 421 491 421 128 491 128 128 128 128 193 193 17 +103-1240-0037 103 740 1 18 38 5 33 3 25 1 14 34 29 35 33 31 5 10 5 25 27 32 5 25 19 25 33 5 39 6 30 18 13 11 1 32 20 11 19 24 4 25 11 19 11 19 31 5 29 30 36 37 19 26 23 20 1 12 19 31 18 4 11 9 19 25 11 5 25 38 19 12 7 33 18 14 5 11 37 8 31 9 20 19 26 4 31 22 33 1 4 25 11 24 5 31 33 29 14 16 6 30 31 9 20 11 19 31 5 29 30 36 37 11 1 38 13 23 38 20 37 9 19 25 34 19 26 22 19 26 5 9 7 33 19 33 16 14 31 5 24 33 8 24 1 6 23 38 19 25 33 14 19 25 16 4 22 33 1 30 19 33 14 25 11 24 3 30 19 23 5 1 25 2 2 1 3 4 4 3 9 4 2 3 2 2 3 3 2 6 3 5 2 2 1 2 3 2 2 3 3 6 3 9 3 7 2 3 2 4 6 2 2 2 5 2 4 2 4 3 2 2 3 4 2 6 39 2 4 5 5 1 3 2 3 4 4 4 5 3 2 2 6 3 4 3 2 2 4 6 5 2 4 2 4 6 5 2 6 10 9 3 4 3 4 3 3 5 4 5 3 5 5 3 3 3 2 3 2 3 2 5 5 5 66 3 10 4 3 3 3 3 3 3 4 1 3 3 2 3 2 3 5 3 2 3 2 2 5 3 4 5 8 9 11 5 5 3 2 2 4 2 2 4 4 6 4 5 5 3 2 5 5 2 2 2 2 2 4 3 6 11 17 17 17 296 211 317 317 317 317 52 52 52 52 52 52 52 52 51 51 51 51 184 184 184 491 320 320 181 181 181 285 449 34 125 125 125 348 348 457 14 226 226 226 226 209 411 498 498 498 498 498 169 169 164 164 472 221 259 354 181 181 236 35 478 54 224 344 344 344 36 449 44 44 44 10 10 10 479 331 84 496 496 274 99 99 436 60 60 298 116 94 199 340 340 116 76 377 123 123 123 219 477 222 222 222 372 372 245 58 72 72 110 110 120 120 120 120 37 24 24 131 404 439 225 225 225 225 80 80 80 373 373 338 400 400 400 400 422 143 384 490 490 490 399 217 473 365 365 365 365 365 365 330 388 212 384 191 191 191 314 401 401 75 384 490 490 31 342 342 224 494 494 129 259 74 190 487 487 374 374 374 173 173 176 176 135 200 200 248 359 359 474 474 474 474 19 454 229 229 491 247 312 312 126 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 260 408 408 408 408 391 391 491 316 73 73 73 73 491 127 127 258 258 258 258 31 342 342 342 97 72 72 110 254 254 254 254 314 35 259 137 137 137 137 137 33 33 394 32 465 384 180 180 319 319 319 348 195 195 250 250 364 333 333 220 220 216 114 180 113 113 113 113 167 167 131 449 183 156 156 156 156 406 467 255 255 236 314 90 4 280 280 265 265 265 85 146 146 186 39 342 142 221 336 354 420 420 360 360 135 135 200 200 464 145 376 376 376 376 460 169 150 150 342 86 105 6 96 96 227 419 419 439 78 47 47 47 47 491 491 80 80 289 289 209 83 194 194 194 194 194 194 282 388 195 195 131 472 472 196 217 70 65 65 319 169 150 150 86 238 6 472 221 336 74 190 492 492 492 492 245 349 205 205 261 25 148 148 148 372 372 396 396 271 186 39 86 86 142 221 336 354 420 420 420 422 143 36 371 490 490 31 9 142 221 336 336 74 190 487 487 288 374 374 374 132 132 132 173 173 96 6 227 419 439 439 78 170 305 170 28 28 491 28 491 28 28 28 491 362 362 362 362 362 362 491 362 362 362 491 362 362 305 305 218 218 491 218 218 218 491 218 218 218 218 435 435 211 211 369 21 21 21 21 21 21 21 21 23 23 260 260 260 260 391 391 391 491 73 73 289 289 320 320 109 109 84 84 139 139 139 139 139 16 16 293 293 293 43 364 345 152 152 152 152 422 402 221 221 354 137 137 137 137 33 394 76 465 164 214 214 214 360 360 76 458 192 176 176 135 200 200 464 255 255 8 354 180 113 113 113 113 206 240 285 34 277 277 457 173 155 155 332 332 38 162 232 68 115 273 231 231 231 231 53 53 90 76 108 119 103 103 103 103 103 103 103 85 299 299 203 381 381 117 404 263 439 439 417 417 237 237 47 491 80 80 80 80 435 435 209 287 297 297 297 297 293 293 293 43 364 109 109 278 278 348 64 465 449 300 382 495 467 467 242 116 33 394 393 205 261 25 470 376 376 376 376 460 178 178 233 96 96 227 419 419 439 225 80 80 80 80 80 320 456 456 456 456 236 108 119 308 308 308 308 308 179 313 64 212 472 196 217 473 65 329 495 406 467 134 134 139 175 175 423 423 423 423 263 13 229 229 491 312 15 15 15 193 193 193 193 17 +103-1240-0038 103 357 1 24 19 31 19 40 4 23 19 17 40 4 25 11 14 31 29 13 25 31 14 38 5 40 5 29 18 20 30 38 5 25 11 15 9 19 16 6 30 22 30 19 31 24 5 31 5 25 11 32 20 31 13 11 32 20 38 5 40 17 27 19 26 33 19 17 13 33 15 23 19 33 5 23 17 14 23 16 14 24 12 20 5 31 8 23 5 24 27 37 14 19 25 1 2 19 25 12 5 31 29 30 19 26 1 14 4 2 3 2 3 4 2 3 3 3 4 2 2 3 5 2 3 4 3 4 3 1 4 3 3 2 2 4 4 2 3 2 5 2 3 4 2 3 4 2 2 6 3 3 5 3 1 3 5 5 5 3 3 4 3 2 2 2 4 2 3 2 2 2 3 4 1 3 4 1 2 2 3 5 5 7 3 2 3 1 2 2 6 5 4 2 4 3 4 2 2 2 2 17 5 2 2 1 5 4 2 2 8 12 17 17 17 363 363 363 363 363 408 51 149 228 491 491 320 7 473 258 258 258 31 342 224 494 494 368 453 168 180 145 329 329 175 175 81 81 469 416 416 96 453 168 470 365 365 365 330 348 212 300 300 382 313 186 162 232 232 105 105 336 470 432 432 330 330 64 64 77 449 224 300 156 382 245 43 43 345 141 141 281 453 342 168 230 230 230 215 35 74 183 485 286 286 382 245 245 43 364 276 174 174 174 348 348 64 64 212 93 93 93 93 93 301 8 8 255 255 349 349 155 155 148 148 372 372 245 245 458 458 208 190 487 487 278 258 31 342 342 86 105 196 217 473 459 459 271 271 39 433 433 160 168 89 55 322 67 394 76 310 338 338 400 400 400 400 30 324 422 186 162 68 68 115 470 470 120 240 240 314 310 338 400 400 400 400 301 378 345 141 141 281 281 9 221 336 144 106 496 88 319 146 135 339 76 36 377 87 87 416 259 445 180 443 443 240 325 34 44 44 44 251 251 241 431 278 26 302 302 497 497 122 32 401 82 144 498 498 498 498 498 139 302 293 497 497 497 122 393 155 155 165 165 165 165 466 466 448 448 464 255 38 38 162 68 115 273 273 265 265 85 85 146 146 175 81 81 242 203 399 70 65 410 410 410 410 173 280 29 29 406 467 89 446 67 58 72 72 72 496 496 496 215 215 35 96 96 272 449 242 275 275 275 303 195 199 335 188 188 340 340 466 22 283 455 38 162 232 105 105 336 491 336 190 380 288 288 288 328 328 200 303 303 117 48 78 491 491 491 491 421 421 491 201 193 193 193 193 +103-1240-0039 103 721 1 31 27 24 4 34 39 36 5 25 11 8 18 4 37 33 6 22 33 19 33 27 37 14 6 16 5 25 11 6 25 13 37 14 31 19 25 31 1 38 20 34 6 33 38 20 11 17 13 33 5 9 28 1 24 4 34 39 36 19 40 17 19 33 19 26 5 29 19 25 39 14 40 39 36 25 27 1 18 20 40 31 19 22 31 33 20 1 4 25 11 18 20 19 40 5 25 31 27 31 29 30 8 13 40 18 20 38 5 25 31 38 3 40 1 18 19 40 18 3 30 33 30 5 9 5 23 40 18 19 24 5 17 35 11 20 23 1 4 25 11 39 36 25 27 18 7 11 13 31 29 30 19 33 18 3 30 11 19 33 31 17 3 33 19 9 20 33 19 17 13 33 18 8 14 11 18 13 23 29 1 9 9 5 3 6 3 2 4 1 2 2 7 2 2 2 5 3 3 2 2 2 7 3 6 6 4 3 1 2 7 5 4 3 3 5 3 6 7 19 3 3 3 5 2 2 2 2 3 2 2 2 4 10 24 5 5 3 2 2 2 3 3 2 2 2 5 3 4 1 4 6 7 3 3 2 3 10 4 6 2 3 5 2 4 4 3 7 1 6 1 2 1 5 2 4 1 3 5 4 5 3 2 7 3 3 2 3 4 2 3 4 5 7 7 17 4 2 3 3 4 4 6 2 3 2 2 2 3 2 2 3 2 4 2 7 7 6 19 5 2 1 4 1 5 5 4 5 7 3 5 2 2 1 2 4 3 3 3 2 2 3 3 4 5 2 3 5 2 1 3 3 2 5 5 4 2 3 3 5 5 15 17 17 17 363 363 51 51 491 184 373 373 66 232 68 172 115 273 84 84 16 16 274 274 399 70 473 65 329 329 460 169 169 164 164 164 485 485 485 374 88 89 89 446 212 131 106 111 111 111 111 85 438 58 110 110 202 202 202 402 221 36 108 119 437 405 405 206 178 35 96 96 272 277 191 325 34 180 410 410 410 410 410 410 173 280 29 334 382 59 59 245 335 14 287 284 405 405 206 169 349 352 25 242 242 242 116 94 199 106 426 426 426 426 282 282 388 195 117 335 335 440 145 463 463 463 463 29 382 313 186 162 342 172 115 273 432 432 330 379 379 243 243 243 77 433 433 433 160 112 56 247 247 312 126 292 292 292 292 1 326 326 326 326 101 101 149 149 228 491 491 320 345 152 152 422 422 164 164 106 106 405 405 206 167 35 35 397 152 152 152 314 90 458 445 180 443 443 240 285 44 44 8 259 354 153 153 153 153 387 387 207 207 19 454 417 417 417 417 417 237 237 47 491 47 2 491 2 491 491 2 316 316 316 491 316 316 73 491 289 491 7 217 217 473 329 329 329 329 329 329 164 485 485 485 485 374 141 281 281 9 221 336 445 180 443 240 325 449 176 135 135 200 200 180 230 230 230 230 215 35 192 340 340 340 116 33 33 219 219 219 219 286 286 286 286 334 304 304 304 185 49 323 219 219 152 152 152 236 94 331 84 84 84 84 84 16 274 98 263 13 417 417 417 417 435 225 373 451 451 451 30 30 422 186 162 232 232 68 68 115 273 278 278 178 143 96 96 86 86 86 238 272 41 41 41 41 19 19 454 454 225 225 225 83 83 55 55 55 322 212 34 30 30 30 324 356 356 356 356 281 453 342 242 242 379 379 478 478 68 172 344 344 344 344 344 186 162 54 482 142 105 336 336 190 499 499 499 499 85 85 146 146 464 253 253 253 368 342 342 451 30 30 301 378 43 276 174 174 319 319 348 379 77 77 9 142 397 336 276 276 346 346 346 387 355 355 355 37 185 185 269 433 433 112 427 82 247 312 126 292 292 326 326 326 326 326 326 326 326 101 101 149 149 228 491 451 257 257 257 31 342 142 72 72 437 306 306 306 306 306 396 167 167 457 32 401 259 161 161 487 499 151 481 215 215 29 302 497 497 71 71 342 342 57 57 57 57 203 53 44 44 44 416 239 458 484 484 484 484 314 32 32 259 384 371 213 213 213 286 286 139 139 302 375 375 98 98 13 417 229 491 247 126 126 126 292 326 326 326 408 408 408 149 149 228 491 289 491 83 55 55 55 322 67 212 219 219 152 152 152 236 10 309 331 331 84 84 16 274 88 58 72 268 268 268 268 268 268 274 32 32 401 401 401 384 371 443 443 443 150 150 86 105 105 336 29 29 288 313 285 131 72 72 437 306 306 306 306 306 396 396 24 325 34 177 177 143 77 342 142 221 336 144 180 405 405 206 167 167 35 36 377 377 87 87 8 354 420 420 420 246 246 252 325 87 87 416 458 445 180 443 443 240 325 58 72 72 72 437 265 265 85 85 468 468 468 396 313 24 131 58 72 110 351 139 139 139 139 293 375 375 233 233 233 419 229 491 247 312 15 15 15 15 15 15 193 193 193 193 193 17 +103-1240-0040 103 733 1 12 13 30 40 25 13 37 14 1 13 25 20 9 5 11 20 33 5 9 20 18 4 11 9 5 33 12 27 40 31 33 36 29 19 11 18 4 16 17 30 27 25 23 19 33 5 23 16 30 13 25 10 9 28 40 1 4 25 11 13 40 31 36 25 13 40 39 36 11 36 17 19 33 18 38 5 25 9 30 27 22 19 25 33 36 39 35 30 38 15 40 5 25 11 33 6 33 31 5 24 34 19 26 1 18 20 40 5 29 5 25 11 6 16 33 5 12 5 23 3 9 31 33 14 22 4 25 14 20 40 14 12 5 31 33 15 33 31 1 4 33 16 14 31 33 24 4 34 39 36 31 5 17 21 13 31 33 19 11 17 19 33 19 26 5 18 27 24 9 28 1 9 5 33 8 31 13 11 25 27 16 23 4 33 5 12 4 33 1 17 3 1 3 3 3 3 2 9 1 4 4 3 2 3 2 3 3 2 2 4 6 5 2 2 2 2 3 3 3 6 3 4 3 4 3 6 5 6 2 2 4 2 2 2 2 2 2 6 2 1 4 5 3 9 6 18 5 1 2 2 3 5 3 3 3 5 2 2 4 5 3 4 2 3 3 2 4 3 2 4 3 2 2 3 1 2 3 3 4 7 3 2 2 1 4 4 5 3 2 4 3 2 6 10 4 3 4 6 5 3 1 3 6 3 3 2 2 2 4 2 4 2 3 2 5 4 3 4 4 5 4 3 2 4 3 6 4 9 14 4 2 5 7 4 3 5 4 4 2 3 4 2 1 4 3 3 3 2 2 3 2 2 2 4 2 5 7 2 3 12 22 3 2 2 6 6 2 3 5 8 5 3 3 4 2 3 7 8 15 17 17 17 296 296 363 52 52 52 52 52 51 51 51 51 184 491 491 412 0 0 222 356 356 281 9 196 196 479 331 463 463 463 463 29 29 382 245 335 14 14 226 226 226 226 209 209 475 475 475 475 475 475 475 324 301 8 354 180 151 240 240 325 41 324 324 422 36 377 87 87 354 420 420 420 324 3 58 72 72 110 110 486 486 486 460 282 37 24 35 259 159 159 159 236 35 198 127 114 84 496 496 274 186 162 482 482 482 482 482 238 6 272 371 485 374 374 374 8 354 29 191 191 191 37 24 131 472 225 72 72 72 110 110 486 486 460 460 460 169 352 352 402 221 401 336 79 79 288 84 496 496 413 348 250 250 81 278 278 285 285 302 302 497 497 349 234 234 234 261 190 380 288 288 330 64 64 76 310 107 447 447 221 336 354 153 153 153 153 387 387 304 304 185 185 269 433 160 112 427 491 247 312 126 292 292 292 292 292 326 326 326 408 408 408 149 149 228 491 412 83 55 55 55 322 212 34 34 253 253 31 162 482 482 115 485 485 374 374 374 339 94 199 253 253 253 253 453 9 219 219 152 152 152 301 236 239 384 371 371 374 374 132 132 416 416 445 445 180 443 240 385 131 133 133 364 276 174 174 174 319 348 33 250 394 212 465 190 380 380 496 496 274 143 458 192 192 340 340 33 394 108 377 123 123 219 222 222 222 222 245 245 43 43 276 109 109 403 403 403 207 318 318 318 49 342 168 89 89 116 33 76 108 119 437 437 405 405 405 206 167 167 457 35 77 68 342 273 231 231 231 203 53 76 198 164 214 214 214 328 200 200 117 229 229 247 126 126 326 326 408 408 391 228 491 491 373 451 30 30 356 356 368 453 168 180 230 230 230 230 230 215 35 35 35 401 89 89 89 446 67 212 131 106 106 284 405 405 206 169 349 402 402 6 272 123 123 216 216 22 283 455 251 241 431 405 405 405 215 215 169 270 86 238 6 300 382 382 245 458 445 445 351 351 365 365 365 365 388 94 199 495 495 406 337 41 41 318 318 49 9 168 157 157 157 467 313 313 216 22 22 283 38 162 232 232 238 6 272 470 470 171 171 171 358 358 358 233 270 270 433 433 433 160 18 112 439 439 439 439 237 237 237 237 237 237 237 237 80 80 491 435 435 412 83 415 415 415 131 393 234 261 261 25 498 498 498 498 498 396 186 39 54 86 238 6 472 472 196 217 217 473 65 486 486 460 460 169 164 164 485 485 485 152 152 422 162 323 224 224 494 494 236 36 310 395 470 470 151 151 150 39 86 342 272 191 191 191 314 90 401 259 445 180 443 240 325 176 135 200 200 199 44 44 44 58 72 72 72 350 350 350 350 350 413 413 53 250 250 212 354 354 153 153 153 387 387 387 207 207 19 454 229 82 247 126 126 126 126 326 326 326 326 326 326 326 326 326 101 408 408 149 391 491 289 289 289 320 159 159 159 240 285 111 111 111 111 438 438 186 162 342 68 273 470 120 240 240 314 196 196 309 479 331 331 84 84 84 16 16 274 274 349 349 234 234 261 425 386 431 376 376 460 167 167 36 108 377 123 123 216 127 114 92 92 92 92 92 92 282 385 385 233 227 419 439 417 417 237 237 237 421 491 421 421 491 491 128 491 128 193 193 17 +103-1240-0041 103 675 1 12 15 24 15 9 20 6 23 30 8 33 8 24 25 3 33 31 15 19 26 12 13 30 25 3 33 1 9 5 33 25 27 23 5 25 11 5 25 31 33 30 20 33 4 30 5 9 40 16 14 24 20 1 8 31 13 11 1 17 19 37 24 20 5 25 15 33 19 37 9 6 30 25 4 33 23 20 31 33 1 12 13 30 5 23 9 20 5 30 19 31 22 25 27 24 4 33 14 18 36 38 20 17 13 33 1 9 5 33 8 23 16 20 23 1 20 40 20 14 19 25 24 8 24 8 25 11 5 25 11 31 23 20 29 31 7 25 11 14 4 33 25 8 33 31 19 16 38 20 17 13 33 5 9 6 30 25 22 5 25 15 11 20 5 25 1 17 3 4 3 5 2 4 6 4 4 10 3 3 2 3 5 3 5 5 2 2 2 2 4 4 9 7 8 2 3 3 4 5 6 2 3 2 2 3 4 3 2 2 3 5 5 2 2 5 2 3 4 10 4 8 6 4 6 19 3 3 2 4 4 2 6 5 2 2 3 3 4 2 2 2 3 4 6 6 6 19 2 2 1 2 1 3 3 5 5 5 4 4 3 3 4 2 3 2 5 3 2 4 4 5 6 22 3 1 3 4 2 5 4 3 1 9 4 2 4 3 2 3 5 5 9 3 2 1 2 3 4 2 4 3 6 5 3 2 3 2 3 2 6 4 2 2 2 2 2 3 2 2 1 5 4 3 2 4 2 2 6 2 4 1 5 22 17 17 17 296 211 211 52 52 52 363 363 363 408 51 51 228 491 491 320 114 0 0 0 301 399 473 65 476 476 476 171 252 8 420 420 420 324 464 106 297 297 297 297 297 293 293 42 42 42 147 380 499 499 499 428 428 85 85 85 207 207 358 24 34 34 111 111 319 203 53 10 479 307 307 307 307 61 167 167 478 478 68 68 172 115 470 403 403 403 403 135 135 135 200 248 212 127 114 222 222 468 313 313 10 10 479 331 307 307 307 307 426 426 206 206 206 385 24 227 419 419 439 439 225 225 225 225 225 80 80 289 320 159 159 159 159 236 35 196 196 479 331 231 231 231 16 274 274 274 251 251 241 431 431 319 319 348 64 64 212 34 242 242 116 394 478 162 482 482 238 6 161 487 487 213 213 252 252 335 14 411 145 145 486 486 468 468 467 467 467 134 215 8 270 270 86 9 142 393 155 332 332 332 245 399 217 429 429 429 429 246 246 246 19 19 454 454 225 225 417 417 80 80 412 287 111 111 111 438 438 186 162 342 68 115 273 470 120 120 120 37 24 24 404 427 229 491 247 312 126 292 292 292 326 326 326 408 408 408 391 228 228 289 289 289 320 209 445 278 278 278 314 314 196 217 429 429 429 429 464 464 44 44 44 10 10 10 309 479 331 171 171 171 252 325 34 494 173 402 402 221 259 354 354 153 153 153 387 372 396 285 415 415 415 415 457 26 251 241 431 444 444 213 213 246 358 358 39 433 433 86 86 6 6 227 419 439 417 417 237 237 237 237 491 491 491 47 47 491 491 491 316 316 316 73 73 80 435 412 114 0 139 139 139 293 293 8 420 420 420 420 464 44 44 44 44 42 42 147 147 380 288 278 278 278 271 271 39 342 86 105 105 144 472 196 196 331 331 231 231 274 399 217 473 65 486 460 240 285 300 382 245 58 72 72 489 489 374 132 132 8 152 152 152 152 324 416 458 445 445 180 120 120 120 37 385 233 227 419 427 229 491 247 312 126 292 292 292 292 292 326 23 23 326 326 326 101 101 101 149 149 228 491 289 320 159 159 285 285 106 111 111 284 481 293 169 349 205 205 25 485 485 485 139 139 293 497 335 14 411 411 213 213 213 213 213 318 368 368 453 168 41 324 485 382 406 467 467 340 340 340 116 33 250 70 46 46 46 46 438 438 399 217 70 65 480 480 480 480 480 480 85 299 299 299 299 339 64 212 89 89 322 116 394 478 478 232 232 68 26 26 81 444 213 252 215 129 401 478 232 68 68 115 273 470 315 315 315 450 450 413 64 64 131 300 382 382 467 415 415 236 35 196 196 479 331 265 428 428 85 146 358 233 270 342 224 118 118 118 118 402 345 152 152 152 458 445 180 443 443 285 34 44 44 8 32 259 354 153 153 153 372 372 467 467 299 394 76 465 445 351 351 116 94 199 331 171 171 171 171 252 325 34 324 324 464 275 275 275 303 48 48 417 417 417 491 237 237 237 491 421 421 421 491 128 128 491 128 491 305 128 128 193 193 17 +103-1240-0042 103 780 1 31 27 19 25 12 20 13 25 11 38 20 11 19 31 8 11 19 11 33 36 4 31 22 24 19 31 19 40 31 29 13 25 31 14 33 19 29 19 22 5 31 7 33 38 5 25 38 13 25 32 20 38 13 25 33 27 37 14 33 19 17 13 33 18 14 23 19 33 5 23 17 14 23 1 38 20 18 14 11 23 4 31 33 38 20 22 32 20 38 5 40 17 27 19 26 1 31 27 38 20 31 13 25 33 18 14 38 14 11 9 8 30 19 10 14 11 31 29 13 25 31 14 40 16 27 22 31 4 33 22 3 30 24 5 11 20 33 19 9 30 19 26 5 31 5 31 24 3 30 33 1 23 8 22 23 20 9 28 5 37 5 9 7 33 13 25 14 19 23 13 37 5 25 1 38 20 11 19 31 8 11 19 11 12 4 33 38 35 11 9 20 12 5 9 13 31 33 15 21 1 23 7 4 3 3 3 5 3 3 3 2 3 3 2 6 6 2 4 1 3 4 6 5 3 2 2 4 2 2 3 3 3 3 5 4 3 2 3 3 3 4 3 7 4 5 3 5 2 2 3 4 3 3 3 1 4 4 3 2 3 2 3 3 2 4 3 3 2 2 3 1 3 7 6 31 4 3 3 5 3 3 4 4 3 2 4 3 4 2 3 2 3 4 4 4 9 12 7 3 3 3 6 2 2 2 5 4 7 4 3 3 6 2 3 4 2 4 2 2 3 4 4 3 3 4 6 3 2 2 2 5 2 3 3 1 3 4 3 2 3 1 2 4 1 4 3 6 3 4 3 3 4 5 5 5 4 2 4 11 1 3 3 2 9 10 2 3 3 3 4 3 3 2 6 27 4 2 2 3 5 7 2 4 3 3 2 4 1 2 2 3 4 1 2 3 5 6 3 5 8 7 17 17 17 296 305 317 317 491 317 491 317 461 491 461 461 435 435 491 435 435 491 491 435 289 373 66 68 115 273 273 84 16 88 88 109 340 340 340 466 466 22 283 448 448 448 464 464 432 432 432 330 330 388 195 64 131 133 345 152 152 152 422 314 239 371 490 490 38 342 68 115 273 106 265 265 265 85 146 146 325 34 191 191 191 37 314 36 377 87 87 87 14 14 145 145 376 376 460 460 150 150 342 105 221 336 96 196 217 473 258 258 31 342 224 494 494 494 31 162 232 86 105 336 354 470 432 432 330 379 64 77 77 224 224 300 334 334 59 313 313 36 377 87 87 87 129 74 74 351 278 416 416 144 180 180 151 240 368 453 342 168 180 113 113 113 113 450 167 35 131 133 133 364 364 276 174 174 174 174 348 348 195 195 250 250 345 409 409 409 116 64 76 310 338 400 400 30 301 378 43 345 109 109 330 330 64 76 449 449 180 410 410 410 410 8 29 29 382 313 236 36 377 87 87 416 458 445 180 443 240 385 131 58 156 156 156 156 313 313 251 251 81 431 278 285 26 302 302 497 497 416 458 144 498 498 498 498 498 134 302 375 375 98 98 13 229 229 491 312 312 126 292 292 292 326 326 326 326 326 326 326 326 326 326 326 326 326 326 326 101 101 101 101 149 149 228 491 491 320 152 152 152 422 58 58 72 498 498 498 498 396 313 314 35 26 241 241 376 376 376 460 169 150 86 86 6 272 472 397 354 109 213 213 213 358 143 458 96 99 338 400 400 400 301 378 8 141 141 281 281 9 221 221 144 180 84 84 496 88 88 176 176 176 328 328 200 117 117 454 454 439 78 491 491 312 126 126 326 326 326 101 408 408 149 228 491 373 66 66 115 273 84 84 16 43 43 345 152 152 152 422 162 68 68 115 273 273 432 330 330 64 131 131 183 156 156 156 156 156 156 245 43 43 364 276 276 109 498 498 498 59 396 313 24 131 472 259 354 62 62 62 62 438 438 42 147 380 288 329 329 36 107 395 300 382 313 314 478 478 478 172 105 336 470 432 432 330 330 33 394 77 54 107 395 382 382 313 186 31 54 142 393 336 25 25 496 496 496 496 274 215 233 270 270 342 224 415 415 325 472 458 144 27 437 437 306 306 306 396 396 53 53 469 469 24 325 41 41 41 324 422 36 108 377 87 87 8 239 190 380 288 360 360 200 200 464 459 271 31 342 224 44 44 38 162 232 232 482 105 105 196 70 65 65 306 306 306 396 396 385 131 472 225 225 225 225 225 225 7 251 241 431 266 266 266 266 146 178 35 35 401 26 359 359 166 166 324 301 8 129 354 354 153 153 153 153 387 387 387 207 464 464 464 69 130 130 280 255 255 236 8 354 180 113 113 113 113 113 450 167 167 457 401 401 401 75 108 119 351 351 351 432 330 388 199 199 495 495 406 467 134 302 251 251 241 431 443 443 443 173 280 29 275 275 275 303 303 303 48 13 229 491 491 312 312 312 292 292 292 292 292 21 21 21 21 21 21 21 21 21 408 408 149 149 149 491 491 491 320 152 152 152 422 143 384 490 490 490 31 342 68 115 273 470 265 265 428 85 146 146 325 325 191 191 191 191 314 314 198 127 114 114 92 92 92 167 457 364 345 389 389 314 129 259 354 420 420 420 301 216 22 283 455 236 259 354 180 443 443 443 169 150 150 39 342 86 238 272 371 470 93 171 171 171 358 358 233 310 107 107 112 439 417 417 237 237 128 193 193 193 +103-1240-0043 103 737 1 27 23 11 19 25 5 16 33 19 9 20 5 37 31 5 24 39 36 31 19 25 11 36 19 26 10 6 30 40 30 8 33 6 16 1 5 25 11 39 5 26 19 25 5 16 33 5 9 20 33 30 15 25 11 5 29 30 3 29 14 1 38 20 24 20 25 33 19 17 19 37 19 24 5 17 35 11 18 27 24 4 25 11 31 22 36 23 19 26 1 38 20 18 4 11 5 33 13 23 5 17 30 4 24 16 14 24 19 31 19 40 4 23 19 17 40 4 25 11 14 31 29 13 25 31 14 33 5 11 15 1 12 5 24 15 23 24 4 25 9 30 6 33 19 33 16 14 24 12 5 31 33 15 32 5 25 1 31 15 19 26 12 15 38 14 22 5 24 19 26 3 25 12 5 16 8 37 34 14 11 20 33 30 15 25 33 5 25 8 33 1 7 6 5 2 2 3 4 3 3 1 4 6 2 3 6 4 4 5 5 5 2 2 3 4 3 4 6 7 5 7 3 5 3 9 9 2 6 1 2 4 4 3 1 3 2 3 2 2 1 4 5 3 4 2 3 5 7 2 3 4 8 27 3 3 4 3 3 2 2 3 1 2 2 3 3 5 2 2 5 6 5 4 2 3 5 2 6 2 2 6 11 5 2 3 2 2 2 6 2 3 3 1 3 5 4 3 2 3 3 3 3 3 3 3 2 2 3 4 2 2 3 4 3 1 5 3 3 3 2 2 13 18 3 2 4 5 4 3 4 3 4 2 4 3 3 2 3 2 2 2 2 3 3 4 6 2 5 12 7 7 1 5 2 4 3 3 6 3 3 2 4 3 2 2 2 6 7 2 6 3 2 4 6 3 5 2 3 2 3 9 6 6 17 17 296 296 296 184 184 412 209 287 424 424 424 424 424 274 274 122 285 34 34 242 116 479 331 230 230 230 169 349 402 96 36 377 87 87 87 129 354 420 420 420 420 246 3 464 223 223 130 402 478 232 232 232 172 115 273 231 231 231 231 203 53 53 219 219 219 219 219 485 374 374 132 132 186 39 54 342 224 89 340 116 33 394 212 384 371 374 374 88 88 176 176 135 200 200 248 76 465 310 107 395 395 441 441 153 153 153 182 372 372 372 372 304 304 185 185 269 269 9 142 97 397 336 147 380 499 499 428 85 146 146 325 34 106 106 106 426 426 426 426 206 169 169 352 352 352 352 352 352 97 97 225 225 225 83 55 55 55 322 67 64 212 219 219 219 464 180 180 319 319 348 200 464 242 116 94 331 230 169 169 402 402 6 377 87 87 420 420 420 422 422 129 310 161 161 487 487 288 288 290 290 434 434 339 64 212 131 180 230 230 230 167 167 457 401 401 491 190 190 190 488 488 488 405 206 215 215 35 29 334 334 59 59 452 452 263 229 491 247 312 126 292 292 292 1 1 1 1 21 21 21 21 21 21 21 260 260 260 260 391 391 391 491 491 320 345 152 152 152 301 399 217 473 360 360 360 434 339 64 64 108 377 87 87 416 445 485 278 173 280 57 57 57 53 473 44 44 44 416 129 259 144 484 484 484 285 131 58 72 72 72 437 350 350 350 350 350 413 203 381 335 335 14 440 145 194 446 446 33 394 478 478 482 482 482 482 105 336 208 441 153 153 153 182 182 175 81 176 176 328 328 303 117 48 417 417 417 417 237 237 237 491 47 80 491 80 491 7 7 152 152 152 58 58 110 110 254 254 240 34 44 44 236 36 108 119 119 351 486 139 175 175 81 81 469 416 8 79 380 288 288 365 365 282 203 203 53 394 393 155 155 332 332 165 399 217 473 258 258 258 31 342 224 494 494 368 453 168 168 145 329 329 329 175 81 81 469 416 416 453 453 470 365 365 365 365 388 64 212 300 382 382 313 186 54 54 105 336 354 470 432 330 379 379 77 77 54 224 300 334 313 236 36 377 377 87 236 236 93 93 93 93 93 93 207 207 207 207 19 454 229 247 247 126 126 126 326 326 326 326 326 326 326 326 101 101 149 149 491 289 491 127 5 5 455 399 217 473 65 290 290 171 139 139 139 293 293 399 217 65 136 136 136 136 282 388 33 394 32 259 354 190 380 499 405 405 206 206 285 449 34 277 277 24 314 393 155 155 165 165 165 165 466 22 22 283 38 162 342 238 6 272 470 470 171 171 171 358 99 436 436 60 60 298 298 303 303 117 48 229 491 247 126 126 326 326 326 408 408 408 149 228 491 373 66 68 68 68 273 470 403 403 403 403 207 135 135 135 200 200 248 212 127 0 0 0 0 378 378 347 347 347 347 245 143 458 144 27 437 437 319 319 319 53 53 176 176 135 328 200 200 199 125 125 125 125 348 466 283 455 38 349 234 234 261 25 346 265 265 85 85 146 146 438 349 349 234 234 261 164 273 498 498 498 313 285 34 41 324 324 422 143 259 161 161 161 487 487 288 290 290 290 434 434 434 339 394 36 377 377 87 236 10 479 331 331 428 428 428 428 207 207 358 358 233 465 227 419 439 78 421 491 491 193 193 17 +103-1240-0044 103 819 1 31 27 24 4 34 39 36 38 13 25 33 5 9 30 8 33 30 19 37 14 33 5 24 20 33 18 19 24 1 24 19 31 19 40 31 29 13 25 31 14 38 5 23 11 30 3 29 19 24 6 16 12 13 30 1 5 37 22 6 30 31 1 32 20 17 27 40 3 25 33 5 38 8 33 31 4 25 11 40 31 33 15 32 5 25 18 14 31 13 23 16 1 24 19 31 19 40 30 15 10 5 23 29 30 8 11 19 11 18 14 31 13 23 16 3 25 6 23 38 20 40 31 29 20 22 19 26 18 14 24 8 25 11 1 32 20 29 30 5 31 20 11 5 11 33 19 31 29 20 22 19 33 25 7 1 18 4 37 19 26 5 21 5 31 33 19 11 18 14 24 13 25 33 5 23 4 33 5 33 36 11 33 19 12 19 31 5 24 15 40 19 26 29 20 31 5 37 25 39 36 40 1 8 7 4 4 6 3 2 4 3 2 2 4 2 4 1 6 4 2 3 2 3 3 3 3 3 2 3 2 5 23 3 2 5 3 3 5 2 3 4 4 3 2 2 2 3 2 3 4 2 3 5 5 2 2 9 24 4 2 3 3 5 6 11 5 5 3 7 4 5 2 2 3 4 5 4 3 5 2 2 1 2 3 3 4 2 1 4 3 5 3 5 9 53 3 3 4 2 4 3 3 3 3 3 5 3 4 2 3 2 4 4 4 3 4 4 1 3 6 4 3 3 2 5 3 3 2 2 3 4 3 3 6 3 4 18 7 3 2 3 2 5 3 2 2 3 2 3 3 3 3 2 3 3 2 11 5 3 4 2 2 4 3 5 3 4 3 2 2 4 2 6 2 2 2 2 4 6 2 3 5 5 1 3 3 2 2 4 2 4 7 4 2 4 4 4 4 2 3 2 4 3 12 12 17 17 17 296 296 184 184 184 435 435 66 172 115 273 273 84 344 16 274 399 399 473 65 486 486 486 460 460 169 164 164 164 485 485 485 301 378 43 364 109 109 189 330 330 64 76 465 377 123 123 236 32 259 354 190 380 499 428 428 85 146 146 35 35 133 133 147 288 288 278 173 280 29 29 382 313 236 108 377 87 87 399 217 473 213 213 213 252 325 325 183 57 57 57 57 203 381 117 404 13 229 491 247 312 126 292 292 292 292 292 292 21 326 326 326 408 408 408 408 149 149 228 491 320 217 473 258 258 258 342 342 224 494 494 494 258 31 162 232 232 68 68 105 105 336 470 329 329 330 330 379 64 77 342 224 300 300 382 245 245 43 345 109 389 497 497 122 239 161 79 499 499 405 206 215 35 29 57 57 57 203 70 106 426 426 426 426 169 169 352 352 402 198 127 114 114 264 264 264 264 59 59 59 452 263 263 417 417 417 417 170 170 47 47 491 491 2 2 47 2 316 2 491 491 316 316 73 73 289 435 435 83 255 255 130 402 458 144 441 441 153 153 372 372 396 271 186 39 342 323 97 427 247 247 126 126 326 326 326 326 101 101 149 149 491 373 338 400 400 400 400 30 301 416 416 180 180 84 84 496 496 274 71 368 368 453 168 106 426 426 426 426 413 348 64 465 377 123 123 123 43 276 346 346 346 428 85 146 146 252 36 478 66 68 115 470 486 365 365 365 330 388 33 77 77 342 68 238 6 272 470 171 171 252 99 99 436 60 60 116 94 58 58 156 156 156 313 186 186 162 68 115 273 273 279 279 279 279 279 375 375 352 352 352 352 352 352 112 112 417 417 237 237 237 237 237 237 491 237 237 491 237 237 237 362 362 362 362 491 491 362 362 362 491 218 491 218 491 491 211 218 218 366 366 491 491 366 366 366 366 491 366 366 366 366 163 316 316 316 316 73 73 491 320 7 473 258 258 258 31 342 224 494 494 494 31 9 142 397 147 380 329 329 329 329 329 310 107 395 302 302 497 497 122 129 259 190 190 190 488 499 265 265 85 146 146 325 325 34 382 313 285 325 183 156 156 156 156 396 313 186 162 172 115 273 279 279 279 279 279 293 169 352 352 155 125 125 322 94 335 14 14 411 297 297 297 297 297 293 43 345 109 109 109 171 422 186 162 68 68 68 105 105 336 354 213 213 213 143 192 192 135 135 135 200 248 58 156 156 156 156 245 245 399 70 65 480 480 480 480 85 299 299 299 303 243 227 419 427 56 491 247 312 126 292 292 326 326 326 326 326 326 326 101 101 408 228 228 373 338 338 400 400 400 400 301 143 129 74 190 492 492 492 186 162 342 172 444 444 444 444 252 325 325 191 191 191 314 36 108 87 87 87 38 342 86 105 336 470 213 213 213 143 458 192 277 277 277 314 196 196 479 331 331 315 315 315 315 450 450 450 98 263 417 417 417 225 225 225 72 72 110 202 202 202 202 202 280 135 135 135 200 464 464 255 255 236 239 259 107 395 180 151 151 169 150 150 86 238 6 272 191 191 191 240 58 183 156 156 156 156 245 399 399 217 217 473 432 432 330 348 64 212 449 302 302 497 497 14 14 411 145 145 486 460 240 325 449 469 469 469 236 259 108 449 485 485 374 374 374 37 24 259 377 377 123 123 216 22 283 283 38 162 68 342 224 494 494 399 217 217 473 290 290 171 171 171 252 318 368 342 342 176 176 176 328 200 248 248 76 74 485 213 213 213 213 186 39 342 224 462 462 462 462 402 196 196 398 398 398 398 398 374 374 132 132 185 185 185 323 390 18 112 427 56 56 491 491 15 15 15 15 15 193 193 193 193 17 17 +103-1240-0045 103 783 1 38 13 23 24 3 30 19 23 5 1 8 23 21 5 31 33 13 23 39 36 29 23 15 25 12 5 33 8 34 19 26 22 39 35 30 11 36 19 26 5 24 8 33 20 16 36 23 19 32 34 19 26 1 5 30 19 31 22 20 34 19 26 12 4 33 31 38 5 33 1 39 36 11 27 25 33 25 27 38 5 33 39 35 30 17 13 33 19 26 1 39 35 30 9 30 19 26 19 26 5 31 33 30 15 25 21 10 8 23 11 19 25 33 36 39 6 30 18 7 31 5 25 11 18 27 24 1 4 25 11 39 36 11 27 25 33 25 27 5 31 19 26 17 5 23 34 19 26 5 9 7 33 19 24 25 6 30 38 5 33 18 19 40 11 19 31 29 5 40 19 32 5 25 19 40 23 8 22 25 6 30 38 5 33 31 6 30 33 5 37 29 13 30 5 25 33 31 18 20 18 4 11 1 15 8 5 5 3 2 3 3 3 7 12 9 5 3 3 5 5 3 3 4 4 5 3 8 2 2 3 2 8 5 2 4 3 3 1 4 3 5 3 3 2 4 4 4 2 7 5 2 2 7 3 1 11 12 10 5 2 5 3 4 5 3 8 3 4 3 3 3 5 7 21 4 3 3 4 1 2 3 5 3 3 2 2 2 3 4 3 2 3 6 11 6 2 4 4 3 2 2 3 4 3 5 3 2 4 4 4 5 6 3 3 2 3 3 2 1 4 2 4 5 4 1 2 1 3 6 4 12 7 2 2 2 4 4 5 1 2 2 6 2 7 2 5 1 2 2 4 2 4 2 3 6 3 4 3 2 4 4 4 2 1 2 1 3 3 2 3 3 2 3 3 5 1 2 3 3 3 6 5 4 4 4 4 3 4 3 2 2 2 2 3 6 3 6 2 2 2 2 2 4 4 5 4 6 17 17 17 17 296 363 52 51 51 51 51 491 184 184 491 184 7 7 7 364 276 109 109 109 443 443 139 139 293 293 293 497 399 217 70 473 65 329 495 406 406 467 134 139 139 175 423 423 423 423 423 263 263 417 417 417 237 237 237 237 237 201 237 80 435 435 435 440 287 111 111 111 111 139 139 293 293 293 122 35 310 107 395 395 151 151 31 342 342 86 238 6 108 119 119 351 443 151 139 240 240 219 219 477 477 477 477 477 132 8 259 74 74 425 425 386 386 290 290 290 290 434 434 434 434 434 339 466 212 127 45 45 45 325 34 111 111 111 111 111 438 438 438 422 349 164 164 214 214 214 360 360 200 248 76 465 219 219 152 152 222 498 353 353 313 236 239 371 371 374 374 88 88 176 135 135 135 200 44 44 44 399 70 65 65 428 428 146 146 252 449 449 324 324 422 349 349 234 234 261 25 441 153 153 153 153 132 81 81 459 459 469 99 447 447 447 447 238 336 214 214 214 214 214 328 328 200 303 303 404 404 229 491 247 312 126 326 326 326 101 101 101 149 149 228 491 287 287 44 44 44 44 44 42 42 42 147 147 380 288 278 278 31 342 86 86 105 105 336 485 41 324 324 422 349 164 164 164 214 214 214 214 328 328 200 200 200 200 248 248 248 127 114 92 92 92 92 169 35 77 66 142 397 397 276 346 346 346 355 355 37 37 24 227 419 419 439 78 491 491 312 312 312 312 292 292 1 21 21 21 21 21 408 408 408 408 149 228 491 289 219 152 152 152 152 236 325 371 180 84 84 496 350 167 457 457 479 331 84 84 84 16 274 43 43 276 181 181 181 181 35 449 449 485 152 222 353 353 245 416 458 445 180 443 443 240 325 449 176 176 135 328 200 117 117 48 414 414 47 47 47 47 491 491 47 491 491 80 491 7 7 219 219 152 152 222 353 372 245 245 245 129 259 354 190 380 288 288 360 360 200 135 135 135 135 200 200 44 44 44 44 162 232 482 482 482 238 336 161 487 288 290 290 290 434 339 339 64 76 107 447 447 6 6 119 351 437 91 91 265 85 85 85 139 139 293 122 122 131 34 340 340 116 33 394 465 377 123 123 219 477 222 222 222 372 372 245 58 72 268 268 268 268 268 268 169 186 269 323 224 242 116 33 58 58 72 350 350 350 350 274 274 203 381 117 229 247 247 126 126 326 326 326 326 326 101 149 149 228 491 412 83 55 55 55 55 322 67 212 219 152 152 152 152 132 236 239 239 371 180 84 84 496 274 274 274 457 196 479 331 84 84 274 88 88 44 44 44 38 232 232 68 68 115 273 278 360 360 200 200 64 212 302 302 302 497 497 349 205 259 214 214 214 214 200 200 464 255 255 8 354 180 113 113 113 113 113 167 285 449 57 57 57 57 203 203 195 10 309 331 157 157 157 157 372 245 245 43 364 364 181 181 181 181 285 449 34 356 281 453 342 6 272 490 490 490 31 9 105 336 336 494 494 494 368 453 168 418 418 418 99 436 436 60 60 298 116 199 356 356 281 31 9 26 26 241 266 266 266 266 266 146 146 358 143 458 192 472 196 309 479 331 157 157 157 157 372 372 245 245 43 364 276 181 181 181 167 167 35 478 478 68 224 273 153 153 153 396 285 285 462 462 462 402 129 259 74 74 351 351 351 351 264 468 468 468 468 467 467 467 11 275 379 379 77 77 342 342 451 30 30 30 30 58 58 110 110 110 486 486 460 460 240 24 131 404 229 247 126 326 193 193 17 +103-1240-0046 103 696 1 25 6 30 18 7 18 20 40 23 8 22 23 20 33 19 33 14 25 7 33 1 38 8 19 33 38 5 40 27 25 23 20 23 4 31 33 38 20 22 8 30 13 11 19 25 12 5 29 15 29 14 18 7 5 24 4 25 4 25 11 18 19 40 38 8 16 5 29 38 13 31 33 5 37 12 20 8 23 5 25 11 1 33 35 22 5 9 28 7 33 5 37 5 25 6 30 16 5 25 5 31 8 23 5 24 4 25 11 18 20 31 13 33 16 8 30 33 5 12 5 18 7 31 4 33 25 8 33 1 31 13 33 19 33 3 25 29 14 29 5 31 24 3 30 19 23 5 1 4 25 11 25 19 30 23 20 9 14 25 33 12 5 24 33 36 5 22 30 19 31 29 19 25 12 13 30 9 13 11 40 1 6 5 6 2 4 6 3 3 4 3 3 4 2 2 2 3 4 5 2 5 8 19 7 6 2 2 2 2 3 4 3 2 4 3 5 4 2 3 4 3 6 4 3 2 3 2 1 2 4 4 5 5 3 6 3 5 7 3 1 2 1 2 3 4 4 5 4 4 3 6 4 5 2 2 1 2 4 5 4 3 2 3 13 3 4 2 3 4 10 4 3 2 2 2 2 6 3 3 3 2 3 4 5 4 3 3 3 2 1 3 5 5 3 3 7 7 4 2 2 1 3 4 5 5 1 4 2 6 5 16 8 3 3 2 6 5 5 5 4 2 4 5 2 1 4 3 3 7 8 6 1 3 2 4 3 3 4 6 4 2 3 1 3 2 4 3 3 5 1 2 5 2 3 1 2 2 3 3 4 6 8 14 17 17 363 51 51 228 491 7 309 479 331 157 157 157 387 372 372 396 313 58 72 110 268 268 268 268 268 274 274 183 451 30 30 30 356 368 342 9 26 251 241 266 266 266 266 178 458 96 26 359 474 474 301 236 87 87 87 87 36 108 119 308 308 308 308 396 313 94 199 180 113 113 113 113 450 450 413 233 227 419 439 78 170 491 312 187 187 187 187 12 12 12 12 260 260 260 391 391 149 491 491 491 7 7 276 346 346 346 265 85 85 146 464 177 177 177 177 133 133 141 141 141 281 342 168 106 350 350 350 350 348 250 359 166 166 324 301 251 251 241 376 376 376 376 460 169 150 342 86 238 272 397 397 109 109 213 213 213 143 458 144 180 106 111 111 111 438 438 42 147 147 380 288 443 240 240 325 34 340 340 116 466 22 283 455 129 74 351 351 351 171 171 252 215 215 259 29 334 334 59 59 245 58 72 72 268 268 268 268 88 88 88 44 44 399 217 217 473 65 136 136 136 136 136 136 136 282 388 94 34 89 340 116 131 183 257 257 257 257 281 9 142 221 336 364 276 346 346 428 428 146 146 349 205 352 106 230 230 230 215 215 35 35 133 364 364 276 109 109 443 443 443 169 150 39 86 86 238 6 272 69 223 130 198 22 448 448 464 106 106 265 85 85 85 146 175 175 81 81 275 275 116 64 131 427 229 247 126 326 326 326 326 326 101 101 149 228 289 289 491 108 377 295 295 295 295 35 192 44 44 44 8 8 8 354 153 153 153 387 387 387 146 464 464 113 113 113 113 206 285 449 34 69 223 130 44 44 44 94 335 14 411 411 153 372 372 372 396 349 349 234 261 25 242 116 94 199 255 38 31 342 68 115 273 106 265 265 85 85 85 175 175 81 81 203 203 381 404 335 440 55 55 322 67 131 183 451 451 30 30 30 422 186 162 68 68 115 273 189 443 240 385 131 472 393 393 234 234 261 261 25 265 265 265 85 146 146 300 382 382 313 143 36 377 123 123 216 283 283 455 72 72 268 268 268 268 268 169 169 39 342 342 224 415 415 415 314 401 196 479 331 428 428 428 428 358 358 233 36 227 427 427 247 247 312 126 292 292 292 292 292 23 408 408 408 408 391 491 491 373 66 66 68 68 115 273 470 443 240 325 449 277 277 277 277 325 335 14 14 287 284 125 125 125 348 348 195 33 394 76 401 82 74 492 492 492 492 396 215 35 354 459 459 459 271 39 342 86 142 196 70 65 65 495 406 406 467 288 139 139 175 175 423 423 423 423 263 229 229 247 126 126 326 101 408 149 149 491 412 83 83 55 55 322 322 67 10 10 309 479 398 398 398 398 468 468 313 359 359 166 166 166 324 301 301 32 32 32 354 354 498 498 308 313 348 64 76 198 198 114 57 57 203 53 76 465 377 377 123 123 123 88 44 44 44 129 458 208 208 190 487 278 278 31 342 86 105 336 336 354 340 340 116 466 466 114 222 222 222 468 245 8 8 354 470 120 120 330 240 379 243 233 270 270 433 433 160 112 112 56 56 421 491 421 491 491 491 421 421 128 491 128 128 193 17 17 +103-1240-0047 103 644 1 4 25 11 8 25 27 5 25 5 12 14 22 15 31 38 13 30 5 25 5 11 3 29 33 5 11 9 28 39 36 40 11 33 5 31 5 22 12 20 13 17 40 1 12 15 22 35 11 5 25 33 9 30 15 22 18 19 24 5 37 19 33 1 19 16 39 36 18 4 11 4 31 33 24 8 5 11 37 8 31 19 25 12 5 24 4 33 14 1 38 19 10 39 36 11 19 11 5 25 33 1 11 36 24 3 30 19 23 5 1 8 11 18 4 37 31 13 11 16 6 30 24 14 31 20 40 31 15 22 25 3 33 5 34 19 26 22 5 37 31 5 10 5 34 19 26 12 4 33 31 38 5 33 1 15 7 2 2 5 4 6 1 4 4 3 2 6 5 4 3 2 3 2 2 3 3 5 3 2 2 3 4 6 6 3 4 1 2 3 7 3 6 3 5 4 7 6 17 3 5 5 3 2 4 2 2 2 3 2 3 2 2 2 4 2 5 5 16 8 4 3 4 5 1 3 5 4 2 2 6 2 1 3 7 5 2 2 1 2 2 4 4 5 16 4 2 7 3 3 4 2 3 1 4 5 2 2 7 3 2 3 3 3 8 14 10 1 2 1 3 7 4 4 6 3 3 8 3 6 3 4 6 5 4 3 4 6 2 7 1 3 3 2 2 3 3 3 3 4 4 6 2 5 3 5 3 2 11 19 17 17 17 296 296 317 305 305 317 461 491 491 435 435 435 435 287 83 194 194 194 194 322 67 212 34 111 111 111 111 438 438 10 479 331 84 84 88 88 88 44 44 348 10 10 479 331 493 493 493 216 300 300 382 245 143 465 445 351 351 343 343 343 171 358 368 342 9 142 397 336 345 347 347 347 406 467 467 467 340 116 199 255 255 236 236 384 180 106 405 405 405 215 215 96 272 449 191 191 191 314 314 32 401 259 354 153 153 153 387 387 387 146 146 219 219 219 219 485 374 374 368 186 323 323 238 6 272 87 87 87 38 162 68 68 68 68 115 273 151 151 178 178 35 458 96 472 164 198 22 448 448 448 448 464 180 443 443 120 120 120 416 416 233 233 270 49 433 433 433 160 427 247 247 126 126 126 292 326 326 326 326 326 326 408 408 149 149 228 289 491 127 114 0 0 0 0 422 143 458 144 27 389 389 389 389 314 35 196 242 242 33 33 76 465 401 259 354 190 380 288 288 295 143 458 192 183 57 57 57 203 88 69 223 223 223 130 280 277 277 277 277 385 24 227 419 419 439 439 439 439 237 237 237 237 47 47 47 491 491 316 491 80 373 412 412 188 188 118 118 118 118 118 118 402 219 219 152 152 152 132 132 58 58 72 110 110 254 254 240 325 34 145 145 460 460 169 150 342 86 6 472 221 70 46 46 46 46 464 464 255 255 240 314 4 280 106 265 265 265 85 85 146 358 39 39 342 342 224 340 340 466 466 22 283 399 473 65 486 486 460 460 285 449 334 334 382 59 452 229 229 247 126 126 326 326 326 326 326 101 101 101 149 149 228 491 289 320 345 407 407 407 407 35 36 310 107 447 219 219 219 152 152 152 132 236 32 239 384 371 371 278 278 325 242 242 242 379 243 243 36 227 472 472 221 336 336 384 371 374 374 374 374 132 132 132 399 70 473 65 329 329 42 406 467 134 139 139 175 423 423 423 423 423 452 263 229 259 247 312 126 292 292 23 23 23 408 101 149 149 149 228 491 412 287 111 111 111 111 111 438 438 325 34 202 202 202 402 402 162 232 68 68 172 115 470 470 120 120 240 240 314 314 131 393 393 393 155 155 332 332 332 372 372 245 399 399 217 217 217 70 65 65 498 498 498 186 186 54 54 172 224 41 324 324 422 186 162 232 232 68 68 68 68 115 470 470 403 171 171 252 416 458 401 196 196 309 331 307 307 307 61 167 35 35 108 377 87 87 38 164 164 164 164 164 214 214 214 360 200 200 76 458 192 69 223 223 402 66 342 224 344 344 344 449 449 44 44 44 38 164 164 164 214 214 214 214 214 328 200 200 248 248 212 127 114 92 92 92 92 169 35 77 77 66 86 142 397 397 336 276 346 346 265 355 37 37 24 227 419 419 439 439 439 78 237 170 491 421 421 491 491 491 491 341 15 15 15 15 193 193 193 17 17 +103-1240-0048 103 738 1 12 19 31 21 27 9 40 22 5 24 16 14 33 19 26 31 20 24 11 25 8 12 14 33 36 5 16 13 25 11 25 6 30 33 36 5 23 3 30 24 3 30 19 23 5 1 32 20 25 19 33 19 11 31 33 13 11 5 23 20 6 25 1 8 11 27 25 11 19 25 8 12 13 30 40 31 5 24 34 19 26 19 25 38 5 33 39 36 31 15 30 15 10 5 23 1 8 37 18 4 11 31 5 24 22 38 3 23 24 40 24 8 31 13 23 16 1 9 5 33 24 4 34 39 36 38 5 40 33 13 30 5 9 5 23 31 13 33 3 25 19 33 1 8 22 35 11 31 20 12 4 33 31 27 8 17 15 37 19 25 1 21 6 5 8 7 9 3 3 4 3 3 3 2 2 3 6 6 6 3 4 3 6 2 3 3 4 3 7 4 8 5 3 3 3 4 4 2 5 4 4 5 2 2 4 3 7 14 7 2 3 3 2 3 4 4 3 2 3 2 3 5 7 6 52 9 3 5 2 2 4 2 8 2 2 2 3 3 2 3 3 2 3 2 2 3 3 3 2 3 5 7 4 4 4 3 5 14 9 2 5 2 4 3 1 5 3 4 3 3 4 4 2 5 5 2 6 6 18 3 2 3 3 6 3 3 4 2 2 4 5 2 4 3 1 2 3 5 3 3 4 3 4 5 11 8 4 2 3 5 5 2 7 5 3 5 6 3 6 4 4 6 12 17 17 17 17 296 52 52 52 52 52 52 52 52 52 461 51 51 51 184 491 184 184 7 7 7 127 127 258 258 258 258 258 39 342 342 86 86 238 221 336 336 401 310 107 395 395 329 84 84 496 496 496 496 274 274 215 8 96 270 342 86 221 221 144 27 437 437 319 319 53 53 76 205 29 29 469 469 24 325 176 176 328 328 200 200 195 248 49 49 68 68 68 444 444 444 444 444 434 434 434 339 394 212 131 472 472 196 309 479 331 331 265 265 428 146 146 216 300 300 382 236 36 377 87 87 87 88 44 44 44 349 349 234 234 261 261 25 432 432 432 432 330 330 388 195 195 195 195 195 64 212 131 472 472 221 309 479 157 157 157 157 372 313 236 108 377 123 123 123 88 88 88 255 255 251 251 241 431 431 306 306 306 306 306 306 396 203 53 381 217 70 65 65 329 495 406 406 134 134 139 175 175 423 423 423 423 423 263 263 417 417 417 417 237 237 237 47 47 47 491 491 435 435 435 435 373 338 338 338 400 400 400 30 422 94 199 398 278 278 325 449 191 191 191 314 314 478 478 68 68 68 238 6 371 470 443 443 240 325 26 134 359 359 359 474 474 324 324 464 464 426 426 426 426 426 426 282 388 303 303 48 48 417 170 491 170 491 28 28 28 491 28 362 491 362 362 491 362 491 362 362 40 491 491 211 211 369 369 369 369 369 369 21 21 21 21 21 21 21 21 21 21 260 260 260 260 260 260 391 391 391 491 73 289 412 412 287 287 111 111 111 111 438 438 24 384 371 180 84 84 350 274 167 314 36 384 490 490 490 116 479 331 331 265 265 265 85 85 146 146 216 127 300 382 382 313 186 162 232 172 115 231 231 231 231 53 76 76 164 214 214 214 328 200 200 340 340 116 250 250 345 181 181 181 181 35 131 219 152 152 152 422 186 162 232 172 115 273 470 403 403 403 207 301 301 42 147 147 329 329 329 329 252 143 310 107 395 302 302 302 497 98 98 13 417 417 417 417 237 237 237 237 237 237 237 80 80 491 491 412 412 287 287 111 111 111 438 202 202 402 58 72 110 110 110 460 240 240 35 77 478 68 224 231 231 231 53 90 90 76 465 208 208 441 441 106 481 481 426 426 426 426 203 53 381 471 49 49 342 142 221 196 46 46 46 46 438 186 162 68 68 115 273 279 279 279 279 279 279 375 169 352 352 352 352 427 229 491 247 126 126 292 326 326 326 326 326 326 101 101 149 149 228 228 289 320 159 159 159 159 159 35 196 196 217 473 329 329 329 329 460 329 164 164 485 485 485 485 423 132 378 43 345 141 141 141 281 9 86 86 6 108 119 119 351 351 264 468 468 468 467 134 134 134 8 100 100 497 497 186 162 68 68 115 273 470 443 240 285 449 34 125 125 125 125 348 199 199 277 277 277 385 227 227 419 439 417 417 237 237 237 237 237 237 80 80 80 491 412 412 287 287 111 111 111 438 438 143 144 389 389 389 314 478 478 68 68 172 267 267 267 267 267 301 216 127 114 92 92 92 92 92 240 240 143 35 36 478 66 172 224 273 84 84 16 88 88 111 111 111 111 438 438 416 416 445 445 210 210 210 171 252 173 173 280 34 120 120 120 120 388 303 303 303 48 417 417 417 170 491 491 491 421 128 128 193 193 17 +103-1240-0049 103 764 1 19 33 31 27 31 13 23 11 5 24 4 34 39 36 31 13 33 31 18 19 40 24 8 25 11 3 25 13 25 20 34 19 26 12 5 33 18 38 13 25 18 20 11 5 40 8 6 23 38 20 40 16 20 23 19 33 31 24 8 11 39 36 33 20 33 19 17 19 37 19 25 1 5 25 11 4 40 16 14 12 5 30 19 31 22 1 12 13 30 40 30 19 31 22 31 19 25 29 30 19 33 20 25 19 30 13 37 30 20 34 19 26 5 9 3 11 20 11 5 40 19 25 12 19 31 38 14 23 11 1 12 13 30 40 30 19 31 22 31 19 25 29 20 29 5 23 40 18 4 37 19 26 10 19 23 11 30 5 25 5 37 12 13 30 27 25 19 16 19 33 22 5 24 40 33 5 12 4 33 1 12 15 11 27 25 33 6 23 38 20 40 33 14 25 7 33 38 13 23 1 13 5 3 6 5 6 3 4 3 3 5 5 3 3 2 6 3 3 1 2 1 4 4 5 2 2 5 3 2 3 3 4 3 6 2 3 2 3 2 2 2 5 3 3 8 5 6 3 3 3 2 6 2 5 3 2 2 3 3 4 3 3 3 3 4 3 3 3 2 2 5 7 24 7 1 3 4 6 2 4 2 3 3 3 6 7 18 3 2 3 6 3 2 5 3 2 2 4 3 3 1 2 2 3 4 4 5 3 3 4 3 3 4 3 4 4 3 4 3 5 4 2 1 2 3 4 4 3 5 4 19 3 2 4 5 4 3 6 2 3 1 3 4 4 2 2 2 3 2 4 2 2 4 4 3 3 2 2 1 2 3 2 1 4 4 9 3 3 3 2 2 2 3 5 3 2 3 2 8 6 17 3 7 2 6 1 3 3 3 3 2 3 5 3 3 5 2 3 3 7 15 17 17 17 17 296 363 363 363 363 51 51 228 491 412 412 177 177 177 177 356 478 66 68 68 172 115 273 344 344 344 274 274 186 162 232 232 172 115 273 273 139 293 293 293 122 122 272 34 242 319 203 53 381 217 217 473 65 486 486 460 169 169 164 164 485 485 485 374 422 186 162 68 68 273 470 443 443 240 71 71 342 224 257 257 257 257 453 9 196 217 70 65 480 480 480 480 299 299 299 339 212 34 106 125 125 125 388 94 199 475 475 475 475 475 475 475 475 422 349 164 164 214 214 214 214 328 328 200 200 248 212 127 45 45 45 45 385 131 133 133 364 409 409 409 409 348 94 183 183 451 451 30 30 301 236 36 384 71 71 71 71 71 71 71 71 71 368 453 342 168 106 111 111 111 111 438 464 106 297 297 297 297 297 43 109 109 109 469 186 39 342 86 142 393 261 25 444 213 213 139 139 251 241 81 177 356 236 71 71 142 221 196 70 46 46 46 438 438 236 239 384 485 485 485 374 374 252 449 449 41 41 41 324 3 143 36 377 87 87 87 416 445 445 278 278 173 280 34 120 120 120 275 388 303 303 303 117 404 78 491 491 312 312 312 292 292 292 292 292 292 21 21 21 21 21 21 21 21 408 408 408 149 149 491 412 412 83 55 55 322 67 212 131 34 253 253 253 253 31 342 342 86 142 221 155 332 332 332 332 332 216 283 455 42 42 147 380 288 278 278 278 271 39 342 433 433 105 105 458 192 419 439 439 417 417 237 237 237 237 237 237 237 237 237 237 491 237 80 80 435 80 491 491 127 114 0 0 222 468 356 281 453 9 142 221 336 147 380 288 278 278 31 342 86 86 105 336 270 270 342 224 494 121 203 53 394 394 465 259 190 190 487 104 104 325 41 324 324 301 10 309 398 398 398 398 468 245 335 14 411 411 204 204 204 204 204 204 29 337 337 337 324 422 164 164 164 214 214 214 214 214 200 200 464 415 415 236 129 259 354 354 91 91 206 206 285 285 41 41 324 301 236 239 384 71 71 71 71 71 71 368 342 168 340 340 340 466 466 114 258 258 258 31 142 142 397 397 364 109 109 498 498 134 139 375 375 375 122 122 131 427 229 491 247 126 126 292 292 292 292 326 326 326 326 326 101 101 408 149 228 491 491 127 114 222 222 468 468 356 356 281 9 9 142 42 147 147 380 288 278 278 31 31 342 86 105 105 336 458 270 270 224 89 89 203 53 394 465 465 74 485 213 213 252 215 129 354 100 302 497 497 49 342 58 72 110 202 202 202 202 202 280 176 135 135 200 248 248 310 107 107 395 395 106 153 153 387 122 122 161 300 242 242 116 94 199 223 223 130 198 198 222 222 222 222 406 406 467 467 350 350 350 350 350 350 350 413 413 413 195 199 118 118 118 118 402 177 177 177 177 458 144 351 351 319 319 319 71 71 71 71 49 86 238 6 123 123 123 216 216 114 92 92 92 92 92 92 92 282 385 385 131 419 427 229 247 247 126 126 292 326 326 326 326 326 101 101 149 149 228 289 289 491 127 114 0 0 0 0 0 252 252 325 180 106 350 350 350 350 413 413 465 131 106 106 297 297 297 297 43 109 109 109 109 318 31 39 142 6 272 119 308 308 308 308 313 116 94 199 331 486 113 113 167 167 457 364 276 109 109 139 139 139 375 375 98 98 13 417 417 417 417 237 491 237 421 421 491 491 491 491 193 193 17 +103-1240-0050 103 701 1 4 25 11 12 13 25 27 37 5 31 22 27 32 5 19 40 30 8 33 22 23 27 31 33 5 12 20 8 23 5 25 11 1 19 33 19 40 5 25 33 13 40 19 16 38 20 38 14 17 13 33 19 26 18 19 24 16 14 24 19 26 17 23 5 25 11 6 30 12 5 31 33 15 33 31 1 18 20 22 4 25 33 9 20 24 5 10 11 19 16 30 5 25 33 16 14 24 3 30 31 13 23 37 40 1 38 13 23 8 18 27 29 19 33 38 5 23 33 14 25 7 33 6 23 30 8 33 1 31 13 11 24 19 31 19 40 30 15 10 5 23 19 25 5 33 27 25 12 5 33 29 23 15 25 23 20 19 25 11 5 22 15 33 19 11 18 14 29 15 25 16 5 23 11 7 33 31 1 16 9 2 5 2 4 7 4 2 3 4 4 4 5 1 2 4 3 4 2 5 2 4 3 3 2 2 4 6 4 2 2 7 4 5 2 2 4 2 2 2 2 4 4 4 1 3 3 3 3 3 2 3 3 2 3 3 4 2 2 5 5 2 3 3 1 3 2 3 2 3 4 3 6 5 5 17 4 2 4 4 2 1 2 4 5 3 6 2 2 3 2 1 2 1 3 2 2 2 4 4 4 4 3 11 52 4 3 4 7 8 5 3 2 3 1 2 2 6 3 2 5 2 4 3 4 5 7 16 4 3 2 3 2 4 2 5 2 4 3 3 4 2 2 2 6 6 2 2 1 3 6 3 4 3 2 5 3 2 2 3 4 4 2 2 2 4 3 7 4 6 4 1 5 4 10 3 7 5 17 17 17 296 296 363 363 363 363 363 51 51 149 228 228 289 412 83 83 194 194 194 194 194 194 388 388 64 64 131 472 198 127 361 361 361 361 361 388 67 10 10 479 331 331 84 496 496 173 173 280 29 255 38 162 54 482 105 105 336 144 496 496 496 496 274 99 99 436 395 395 50 50 50 31 9 142 397 147 380 499 428 428 146 167 457 35 401 259 208 208 386 386 496 496 496 496 186 39 86 238 6 377 123 123 123 22 448 448 448 464 180 106 265 265 85 85 146 134 175 81 81 275 275 388 303 243 131 419 439 439 225 417 80 80 491 491 209 188 177 177 177 325 356 356 356 281 342 342 168 242 242 116 64 212 34 253 253 368 453 342 168 118 118 118 118 118 205 402 152 152 152 152 378 378 345 347 347 347 313 416 458 445 180 443 240 325 325 176 135 135 200 200 248 183 57 57 57 57 203 53 76 205 155 165 165 165 165 165 335 14 411 411 360 360 360 200 200 248 248 248 441 302 81 81 275 275 116 64 212 131 157 157 157 157 157 216 216 22 283 455 38 162 68 68 6 272 470 470 171 171 171 171 358 358 358 358 270 270 270 433 160 112 427 229 247 247 126 292 292 326 326 326 326 326 326 101 408 408 391 228 491 491 373 451 451 30 143 458 445 445 351 365 365 365 460 460 76 465 465 420 420 420 324 301 399 217 217 383 383 383 383 383 383 310 310 447 447 6 336 371 278 278 349 349 155 29 242 275 116 394 90 393 155 332 165 165 53 65 353 353 353 353 396 186 162 342 115 273 279 279 279 279 279 279 293 169 169 352 270 433 390 390 18 112 112 439 439 78 56 56 491 491 28 491 491 312 491 341 341 12 12 12 12 292 21 21 21 21 21 21 21 21 21 21 21 21 21 369 260 260 260 260 260 260 40 40 40 40 40 40 163 163 491 491 305 305 491 316 316 316 73 491 491 320 345 109 109 139 139 139 175 175 81 111 111 111 111 438 438 438 58 72 72 72 72 72 437 496 496 496 496 496 215 35 35 354 177 177 177 131 133 345 389 389 497 497 36 108 108 308 308 308 308 308 313 94 199 113 113 113 113 206 285 285 106 106 297 297 293 293 42 42 147 380 499 428 428 428 146 358 358 233 227 419 419 439 417 417 417 237 237 237 237 237 237 201 237 237 201 201 201 80 373 435 435 108 179 179 179 179 179 314 196 217 473 258 258 31 342 224 494 494 494 281 9 142 397 147 147 329 329 329 329 252 310 107 395 302 302 302 497 175 175 81 89 340 94 199 255 255 236 36 119 119 351 351 496 350 350 350 413 413 466 466 22 45 45 236 129 129 82 74 74 425 425 386 386 386 290 290 290 290 434 339 33 359 359 166 166 166 3 14 411 188 121 121 116 64 212 384 469 469 416 143 458 445 158 158 158 158 158 325 34 191 191 314 131 58 156 156 156 156 245 8 129 259 74 74 351 351 290 290 290 290 434 339 339 195 248 90 393 393 155 262 262 100 100 497 497 497 122 239 384 371 180 180 486 315 315 113 450 450 450 167 37 233 270 270 433 390 390 18 112 112 439 439 193 193 193 17 +103-1240-0051 103 767 1 27 25 23 20 11 27 25 33 31 15 8 11 19 11 5 25 38 6 30 25 39 36 19 16 18 20 9 14 25 40 17 30 20 25 17 15 9 5 23 40 11 7 25 14 29 35 33 31 33 30 19 22 25 8 25 19 25 12 5 38 13 23 1 8 18 14 11 5 37 5 22 15 31 27 37 14 19 25 39 36 9 30 5 25 40 38 19 22 38 13 30 5 25 6 30 16 5 25 5 31 8 23 5 24 10 8 23 11 19 11 12 4 33 1 5 25 11 12 5 18 27 23 16 4 24 23 20 11 8 11 19 25 16 19 30 16 5 23 4 17 5 25 20 40 1 27 25 23 20 19 33 38 5 40 5 17 14 23 19 25 12 4 33 19 25 31 33 5 25 31 1 38 13 23 38 14 25 3 33 17 13 33 19 26 5 17 14 23 1 31 13 11 24 3 30 19 23 5 1 5 6 5 3 4 5 5 3 2 7 6 6 2 3 1 2 3 4 2 3 2 3 3 2 2 3 2 5 8 4 3 3 3 4 5 3 4 3 2 4 2 4 8 3 4 4 2 4 5 3 2 3 3 3 3 3 3 2 1 2 3 4 9 19 8 3 4 2 1 2 3 4 6 5 5 5 2 2 4 3 2 5 2 2 5 2 2 3 5 1 2 1 2 2 4 3 3 1 3 2 5 4 3 2 3 5 5 3 3 3 4 3 4 5 13 4 2 1 2 2 7 5 4 5 5 2 3 3 4 6 2 2 3 7 4 3 3 2 4 5 3 1 2 6 8 23 8 2 3 2 2 2 3 1 3 2 3 6 3 3 1 3 2 2 3 3 4 2 2 4 7 25 6 4 3 4 3 2 4 4 3 2 2 2 4 2 4 5 7 18 5 2 2 2 2 3 3 3 7 5 17 17 296 296 184 491 209 287 350 350 350 350 350 350 250 359 359 81 166 166 324 324 422 314 32 239 384 371 180 84 350 350 350 413 413 243 131 472 232 232 232 68 115 273 470 470 403 403 171 464 464 111 111 111 111 438 438 239 371 371 278 242 314 242 242 242 394 133 364 276 276 153 153 387 387 396 348 339 219 219 477 477 477 88 118 118 118 118 402 183 451 30 30 301 32 129 259 354 354 498 498 498 498 498 396 396 242 116 195 471 368 453 9 142 221 144 208 79 288 288 360 360 360 434 339 200 33 248 248 212 445 180 171 171 171 252 252 8 354 100 302 497 497 497 49 453 9 6 384 371 180 315 315 315 315 450 450 450 413 413 94 199 157 157 245 245 129 259 74 190 189 189 236 35 478 478 482 482 482 482 482 238 6 161 487 288 178 178 458 458 192 196 196 479 398 360 360 434 434 339 199 34 340 340 116 466 22 283 455 43 276 109 109 139 139 139 139 375 375 375 375 98 13 229 491 247 126 126 126 326 326 326 326 326 326 326 326 326 101 101 101 149 149 228 289 320 287 111 111 111 438 438 58 110 498 498 498 498 396 285 34 223 223 280 44 44 44 458 445 445 351 351 343 171 171 358 358 39 342 342 224 224 106 410 410 410 410 173 173 29 29 495 467 467 44 44 116 10 10 398 398 398 398 374 132 236 32 259 354 190 380 499 319 319 348 348 471 471 49 9 142 397 109 109 288 178 178 143 458 208 397 347 347 467 467 44 44 94 14 14 411 153 372 372 396 349 349 352 29 242 116 199 44 44 38 342 342 115 273 106 265 265 85 85 146 175 81 282 203 53 394 90 310 107 395 351 91 91 91 85 85 139 450 293 122 35 401 384 371 278 278 314 314 401 401 127 114 92 92 92 92 167 385 35 227 427 229 247 126 126 326 326 326 326 101 408 149 149 228 491 412 55 55 55 322 67 466 198 5 5 455 38 72 72 72 72 72 437 424 424 424 424 424 497 497 122 349 401 205 261 25 25 365 365 365 365 460 203 53 359 81 81 41 324 324 422 36 371 180 265 265 265 85 146 146 325 34 340 340 116 33 76 76 205 234 234 234 261 25 485 286 286 286 468 245 349 349 205 262 262 100 497 497 14 14 145 145 486 460 460 416 458 242 242 116 199 41 41 41 19 318 185 433 433 433 160 112 427 56 170 491 312 312 312 187 12 12 292 12 12 12 12 12 408 408 260 391 491 491 316 491 491 491 491 412 287 287 350 350 350 350 350 359 359 81 166 166 464 177 177 177 133 133 141 141 141 281 453 168 44 44 416 208 79 498 498 498 498 134 302 497 175 81 340 340 340 466 466 114 92 92 92 240 325 34 121 121 121 379 77 77 342 86 238 6 272 11 11 379 379 243 471 49 433 390 390 18 18 112 439 439 237 237 237 237 237 237 237 305 305 12 260 260 260 260 260 260 260 163 163 316 316 316 316 491 7 7 7 364 109 109 139 139 139 293 293 43 43 345 347 347 347 313 313 94 479 307 307 307 61 167 131 472 401 259 144 445 443 443 240 325 176 135 135 200 464 44 44 44 416 458 144 79 498 498 498 499 499 302 375 375 98 98 13 417 417 417 237 237 237 237 237 237 237 237 491 491 80 316 491 80 491 289 435 66 66 179 179 179 179 314 196 217 70 65 329 329 406 406 467 134 139 139 175 423 423 423 423 423 263 263 229 247 15 193 193 17 +103-1240-0052 103 739 1 4 40 19 16 29 28 40 5 25 19 26 38 13 23 40 38 14 5 29 39 35 30 23 20 16 13 24 5 25 5 25 5 22 3 24 29 23 19 32 24 5 25 33 1 5 25 11 25 3 33 19 9 20 11 30 13 11 19 11 19 25 12 20 22 15 31 5 37 5 9 28 1 8 11 25 13 37 14 11 30 20 24 5 37 33 15 22 19 26 5 17 14 23 33 5 9 30 19 26 5 29 1 8 38 5 25 11 14 4 33 24 19 31 19 40 4 23 5 17 40 4 25 11 14 31 29 13 25 31 14 16 14 11 36 19 26 19 33 1 9 5 33 12 13 30 1 32 20 38 35 11 5 25 33 32 30 19 26 22 16 14 24 5 11 3 29 33 19 26 5 18 27 23 6 30 16 5 25 5 31 8 23 5 24 19 16 32 20 33 35 22 19 33 19 25 33 36 18 14 18 13 11 1 7 6 3 3 4 4 6 3 2 1 2 5 3 4 5 4 4 3 3 4 3 2 4 2 5 8 2 3 2 1 3 2 2 5 2 2 2 2 2 5 2 2 2 3 12 7 2 1 3 4 4 2 2 3 4 3 2 3 2 3 2 2 2 1 5 5 5 1 3 2 4 12 38 7 2 2 3 2 3 5 4 4 2 2 2 5 3 3 2 4 2 7 5 4 2 2 2 2 3 3 6 7 8 7 6 2 2 3 2 2 2 1 3 3 2 2 4 2 3 2 3 3 3 3 2 4 4 2 4 4 2 3 2 4 2 4 3 3 6 10 3 2 3 1 2 8 7 12 8 3 2 2 2 2 1 6 1 2 4 3 2 2 2 2 3 5 2 2 1 4 3 4 2 9 5 3 4 2 1 3 5 5 3 2 1 3 2 3 2 3 3 3 2 2 3 2 4 2 4 4 3 5 6 22 17 17 363 363 51 51 228 209 83 145 253 253 253 453 342 342 118 118 118 118 349 402 221 259 74 74 441 153 153 387 387 146 368 453 342 242 196 309 199 176 135 135 200 200 248 250 364 276 109 109 443 139 139 139 293 293 293 185 49 9 142 397 345 347 347 347 347 406 467 255 255 129 129 74 74 485 485 485 286 286 468 468 468 134 359 359 359 166 166 324 3 422 349 234 234 234 261 25 25 443 443 330 203 53 473 242 242 199 199 89 446 94 199 255 255 143 259 144 27 437 319 319 53 53 76 465 81 81 469 469 99 447 447 221 196 291 291 291 291 291 243 227 419 427 247 247 126 126 326 326 326 326 326 101 149 149 491 412 83 55 194 194 194 388 67 10 10 479 331 307 307 61 167 167 36 108 87 87 87 8 420 420 420 422 32 239 161 161 79 79 288 443 443 240 325 34 191 191 24 36 34 340 340 340 466 22 283 455 143 458 445 445 351 343 343 171 358 358 39 342 342 224 69 69 130 280 29 44 44 236 8 354 153 153 153 153 387 387 387 207 207 207 454 13 229 82 247 312 312 312 292 292 292 292 292 292 292 292 1 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 408 408 408 408 391 391 289 491 209 287 111 111 438 438 314 196 309 479 463 463 463 463 29 29 382 313 236 32 32 239 161 79 79 288 360 360 360 434 434 203 53 69 223 223 402 221 259 108 119 295 295 295 295 295 458 135 135 135 135 200 200 44 44 44 44 129 401 491 144 79 498 498 498 498 139 302 302 497 122 122 449 87 87 87 8 354 380 288 288 360 328 200 464 230 230 230 230 230 230 215 35 29 419 419 225 225 225 225 225 225 225 225 225 225 225 225 287 287 111 111 111 438 378 43 364 276 174 174 319 348 348 64 212 161 300 382 382 277 415 457 196 217 258 258 258 342 342 224 494 494 453 168 145 329 329 329 175 81 81 469 416 416 96 342 168 470 365 365 365 365 348 64 212 300 382 382 186 186 54 54 105 336 336 74 470 432 330 379 64 64 77 77 224 224 334 382 245 349 155 332 332 332 236 239 384 371 374 374 88 176 176 135 135 200 200 277 277 277 277 233 227 419 439 439 439 225 225 225 391 491 80 491 491 73 491 320 159 159 159 314 35 259 127 114 264 264 264 468 59 59 452 263 417 417 417 417 47 47 491 491 491 435 197 373 338 338 338 338 338 400 400 400 400 95 95 246 246 246 301 378 378 345 389 389 389 314 196 242 242 33 33 310 107 338 161 161 161 487 288 360 360 360 200 200 243 96 96 393 155 165 165 165 53 44 255 236 239 384 180 180 405 405 206 206 35 96 272 176 135 135 200 200 44 44 44 44 72 72 72 424 424 424 424 424 424 497 497 497 335 14 226 82 411 411 157 372 372 396 349 349 234 261 25 242 242 94 199 459 38 31 342 342 273 106 265 265 85 85 85 175 175 81 81 203 53 118 118 118 118 402 338 400 400 400 422 36 108 377 295 295 295 416 458 277 277 277 325 34 340 340 340 116 64 76 108 377 123 123 123 88 88 156 156 156 156 245 58 58 110 110 120 120 120 120 120 37 24 24 404 439 78 229 491 312 312 15 292 292 292 292 292 292 292 21 21 21 15 193 193 193 193 17 17 +103-1240-0053 103 683 1 24 19 31 19 40 30 15 10 5 23 38 35 11 18 4 37 23 8 22 33 19 31 33 15 5 25 33 19 23 24 4 34 39 36 22 15 24 18 27 24 38 19 12 18 19 40 19 24 29 6 30 33 19 11 6 30 16 5 25 1 9 5 33 30 5 16 23 13 22 33 19 26 12 5 33 19 33 38 35 11 9 20 5 17 35 11 33 36 7 14 40 4 33 23 20 31 33 9 19 16 6 30 18 19 40 14 8 37 5 23 1 32 20 22 5 25 22 23 36 11 19 11 33 19 17 27 5 29 12 5 30 27 11 33 36 30 3 9 14 33 9 13 23 40 5 25 11 33 13 23 12 5 25 39 36 40 1 19 33 38 35 11 31 14 33 5 25 23 20 24 15 22 5 31 13 25 31 15 32 5 25 31 13 22 5 25 33 5 25 5 25 1 23 5 2 3 3 4 3 3 3 3 3 2 2 1 2 1 2 3 4 2 3 2 4 3 4 2 2 3 2 3 3 6 3 2 2 5 4 4 1 5 2 2 2 1 2 1 3 2 2 4 2 3 2 1 3 7 3 4 2 6 23 3 3 3 2 4 3 1 3 3 3 3 4 1 3 1 2 3 2 1 2 3 5 3 6 3 5 5 7 8 3 4 2 4 3 4 3 2 2 3 5 2 3 2 1 4 6 6 2 2 5 14 6 3 2 3 2 3 2 4 2 3 2 3 2 3 5 5 4 2 3 5 8 2 8 3 4 4 2 2 4 3 4 7 4 1 2 2 3 3 3 2 2 3 4 7 10 14 5 4 2 2 3 6 4 2 2 1 2 2 4 4 3 3 4 2 4 4 4 5 2 3 3 3 3 2 1 3 2 4 5 6 8 17 17 17 17 296 52 52 52 52 52 52 52 52 461 461 491 461 461 184 491 491 305 305 289 7 217 473 258 258 258 342 342 224 494 494 494 453 9 142 397 147 329 329 329 329 329 143 36 449 302 302 302 497 43 43 345 389 389 389 285 34 202 202 402 402 251 241 266 266 266 266 146 178 35 35 272 87 87 38 162 342 86 238 6 272 470 403 403 464 464 464 330 348 76 76 108 377 139 139 139 497 399 217 217 473 486 486 486 460 460 169 164 164 485 485 485 374 422 143 144 445 210 210 210 210 210 210 203 53 58 58 350 350 350 350 350 203 250 250 345 333 333 220 216 22 257 281 453 9 168 121 121 53 76 465 74 74 441 153 153 372 372 313 449 449 191 191 24 335 14 14 411 153 153 372 372 372 396 349 352 352 352 275 275 275 116 303 303 48 229 491 247 312 126 126 292 292 292 292 326 326 326 23 23 23 101 408 408 408 149 149 228 491 289 491 354 159 159 159 159 314 133 133 456 456 456 456 349 349 234 261 386 386 151 151 151 178 35 96 36 449 176 176 135 135 200 200 248 212 127 45 45 45 325 177 177 177 177 345 345 389 389 389 129 129 259 420 420 420 420 464 464 44 44 44 416 129 401 401 144 484 484 484 484 314 314 32 401 401 259 108 377 351 374 374 374 132 88 88 106 145 284 315 315 315 450 450 450 372 304 304 304 49 342 342 168 415 415 415 26 26 26 241 241 444 444 213 213 358 39 39 342 142 221 336 354 354 255 38 349 205 155 148 148 148 148 372 245 58 183 257 257 257 453 168 255 255 42 42 147 380 499 499 265 85 85 146 173 173 280 302 302 375 497 98 229 82 247 126 126 126 326 326 326 326 326 326 101 101 149 228 491 373 338 400 400 400 400 30 143 144 27 121 121 121 33 394 76 208 208 386 444 444 374 374 252 325 34 191 191 191 314 36 36 377 87 87 87 416 416 180 84 84 496 88 88 230 230 230 230 215 215 35 401 198 198 283 283 455 42 42 147 380 380 288 496 496 496 496 274 274 274 37 24 24 36 377 377 377 123 123 272 123 123 123 42 42 147 147 499 499 405 405 206 215 29 469 313 314 32 401 401 401 354 180 180 443 139 139 139 139 375 375 375 375 185 49 342 342 168 89 116 33 394 76 108 119 351 351 139 139 293 293 122 216 283 283 455 116 10 398 398 398 398 398 374 374 132 132 132 185 185 269 390 390 390 18 18 112 439 237 237 237 237 237 237 491 47 491 47 491 491 435 435 435 289 491 209 177 177 177 177 131 133 133 345 389 389 389 314 129 478 66 68 68 115 273 498 498 498 240 240 35 35 359 359 359 166 166 166 301 301 217 217 473 476 476 476 476 143 458 192 44 44 38 342 342 115 273 432 432 379 379 394 77 68 68 115 418 418 418 418 418 99 99 436 436 60 298 379 379 471 478 66 342 115 273 151 178 416 458 192 242 116 64 76 108 377 123 123 116 10 479 331 331 319 319 319 282 388 303 303 117 48 229 247 15 15 15 193 193 193 17 +103-1240-0054 103 816 1 5 25 11 24 19 31 19 40 30 15 10 5 23 1 11 19 30 23 20 23 5 37 11 33 5 24 15 22 5 31 13 25 31 15 32 5 25 1 31 27 32 20 33 35 22 18 14 31 13 23 16 5 38 15 31 5 24 38 5 33 5 24 14 19 23 5 40 30 19 23 20 16 1 16 6 30 12 5 23 4 33 14 16 13 23 33 18 14 11 7 33 31 5 25 11 16 19 30 40 30 19 37 8 37 19 26 5 25 11 14 12 20 19 25 16 23 36 5 25 31 5 37 24 19 31 19 40 30 15 10 5 23 40 29 13 31 5 24 19 40 5 24 1 38 13 23 5 37 6 23 34 19 26 40 12 5 33 1 13 37 14 38 14 6 30 38 19 23 9 20 1 19 21 4 22 39 36 23 15 33 19 11 24 19 31 19 40 30 15 10 5 23 38 19 25 32 20 38 5 40 31 15 16 23 20 7 33 19 25 12 5 23 15 25 1 7 6 2 3 2 2 4 2 5 3 3 4 4 5 3 3 4 3 3 5 5 4 4 1 2 2 2 3 3 2 4 2 4 4 4 6 2 4 20 6 3 3 3 4 4 2 1 3 4 2 3 3 4 4 10 8 3 4 2 3 5 2 2 6 3 4 3 5 2 2 4 7 7 4 6 2 3 1 4 4 4 3 3 5 3 3 2 4 3 5 6 5 2 2 2 3 3 5 3 4 3 2 3 8 2 3 4 3 2 3 1 3 3 3 4 4 3 1 2 3 3 2 3 3 2 5 2 4 2 3 3 3 2 3 4 2 5 3 2 2 4 2 5 43 3 1 2 2 4 6 3 8 2 7 2 2 3 3 1 6 3 4 9 9 4 4 5 3 4 3 8 10 5 5 3 4 2 1 3 3 3 2 3 2 2 4 2 5 2 4 3 4 2 3 3 2 3 3 2 2 2 5 5 4 2 5 4 5 2 2 1 2 5 6 6 9 17 17 17 363 363 51 51 491 412 412 55 55 55 322 67 33 250 217 473 258 258 258 31 342 224 494 494 494 281 9 142 397 147 147 329 329 329 329 329 329 36 310 107 395 302 302 302 497 497 122 122 401 401 401 401 401 384 371 371 286 286 286 286 313 134 359 359 166 166 166 166 301 301 251 251 251 241 266 266 266 266 266 173 173 402 402 6 108 377 87 87 217 473 476 476 476 143 458 192 44 38 68 68 115 273 432 432 330 379 394 77 342 342 115 470 418 418 418 418 99 99 436 436 60 60 298 303 303 48 48 417 417 237 237 237 491 237 2 491 2 2 491 491 491 435 435 491 491 435 435 491 289 373 66 66 115 273 344 496 186 99 400 400 400 30 422 143 36 108 295 295 295 295 295 458 192 156 156 156 186 186 54 172 115 279 279 279 279 279 349 352 29 44 255 255 43 43 276 109 109 403 403 403 207 207 207 207 19 3 454 225 66 66 68 68 68 115 273 231 231 231 231 53 250 250 345 346 426 206 167 167 457 36 108 377 123 399 70 65 65 329 42 42 147 380 288 256 139 175 175 423 423 423 423 271 368 269 142 142 397 147 456 456 456 251 251 241 444 444 444 213 246 246 358 358 173 352 352 352 112 225 225 225 225 225 225 225 373 393 155 155 155 332 332 332 313 216 216 5 5 455 455 251 241 431 486 376 376 460 460 449 449 300 382 382 245 349 349 205 261 25 180 189 139 139 293 122 122 131 183 156 156 156 382 313 313 236 239 239 384 180 180 486 315 113 113 450 450 167 35 270 270 342 224 340 340 340 33 394 76 393 205 261 25 485 286 286 286 286 304 304 304 49 447 142 397 147 456 456 456 456 173 280 106 265 265 265 265 85 85 146 146 173 280 176 176 135 328 200 200 199 89 319 319 348 33 64 212 300 123 216 198 448 448 448 448 464 121 121 121 53 394 76 155 425 425 386 134 88 88 11 11 11 379 471 49 342 168 69 69 223 130 129 196 196 217 473 258 258 258 31 224 224 494 494 494 281 142 142 397 147 329 329 329 329 329 36 449 302 302 302 497 497 31 142 221 336 74 351 351 443 443 150 150 342 342 224 494 494 203 53 459 459 459 368 453 342 168 275 203 203 381 48 13 13 491 491 247 312 312 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 260 260 260 260 260 260 260 260 391 391 391 491 491 73 491 320 345 109 139 175 175 81 223 130 280 280 106 297 297 297 297 297 297 293 293 349 349 352 164 164 164 164 164 214 214 214 214 200 200 200 200 471 471 49 453 198 114 45 45 385 457 14 401 226 82 209 463 463 463 463 463 280 29 382 382 382 245 245 43 43 364 364 276 276 109 347 347 498 498 59 59 59 245 14 14 411 157 157 157 372 372 245 245 43 43 364 276 109 109 329 139 139 293 497 122 8 354 420 41 41 41 19 19 454 454 417 417 417 417 237 47 80 491 80 491 435 435 209 188 177 177 236 36 107 395 180 486 486 460 178 178 458 192 485 469 134 175 158 158 158 158 158 325 449 191 191 191 314 314 196 217 473 258 258 258 342 342 494 494 494 281 9 142 397 397 147 329 329 329 329 329 143 310 107 395 302 302 497 497 43 364 345 409 409 409 116 314 76 465 400 400 400 301 378 345 141 141 141 31 232 232 68 68 115 273 470 171 171 171 252 349 349 402 26 359 166 166 166 324 464 464 113 113 113 113 169 167 36 449 34 340 340 466 466 22 283 455 497 251 251 241 431 431 290 290 290 434 434 339 339 117 404 13 229 491 247 15 15 15 193 193 193 17 +103-1240-0055 103 562 1 19 33 11 5 40 30 20 23 20 31 20 24 13 40 19 16 8 24 5 31 33 9 20 11 30 20 24 19 26 1 38 13 23 8 24 31 3 30 20 16 6 30 12 4 33 29 36 30 39 5 26 38 5 25 4 25 11 25 27 24 19 31 33 15 22 1 24 4 34 39 36 5 25 11 24 3 30 19 23 5 11 27 25 27 13 25 20 34 19 26 5 9 7 33 10 19 23 11 30 5 25 1 5 25 11 12 15 23 19 22 31 29 13 22 33 19 24 33 19 9 20 38 8 40 14 5 25 11 31 33 13 11 20 14 12 5 33 18 19 40 27 25 17 30 4 25 16 3 12 14 1 12 6 2 3 3 6 3 2 4 3 7 4 3 1 4 2 4 3 3 2 3 1 2 2 4 2 4 2 2 7 47 3 2 4 4 3 6 2 4 3 4 1 2 2 4 3 6 4 2 5 4 5 3 2 2 2 2 2 4 3 2 3 5 3 5 7 15 5 5 4 2 5 1 2 1 2 1 5 2 3 3 5 4 5 8 1 4 3 3 2 3 2 2 7 2 4 2 3 3 2 2 5 5 6 1 2 1 3 3 3 2 3 3 2 4 3 2 3 3 1 2 4 6 4 6 3 2 2 2 4 4 1 3 3 3 3 2 1 2 1 3 7 5 3 2 4 5 4 4 2 7 9 17 17 17 363 363 363 51 51 228 184 491 491 320 188 177 177 177 177 143 401 82 384 71 71 71 71 71 453 9 142 221 336 155 487 288 485 278 26 359 166 166 166 166 422 162 232 232 68 68 444 444 444 360 360 339 53 53 473 253 253 453 342 168 118 118 349 402 25 111 111 111 438 399 70 65 319 169 150 342 105 221 336 420 420 422 236 239 161 79 79 288 288 360 360 360 203 53 176 176 328 328 200 303 48 13 491 491 312 312 312 312 312 292 292 292 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 369 21 21 21 21 21 21 21 21 21 21 21 260 260 260 391 391 391 391 491 491 491 320 320 346 346 84 139 139 175 175 81 111 111 111 438 203 53 478 478 232 232 172 115 106 106 153 153 372 372 337 337 337 301 349 155 155 332 332 332 240 216 114 92 92 92 167 457 35 401 259 74 74 441 441 441 153 153 153 372 372 396 313 219 219 219 219 180 180 319 319 348 348 248 250 250 276 174 174 174 388 94 199 89 89 446 116 10 10 309 479 331 84 84 496 399 399 473 65 459 31 31 342 86 86 238 6 470 470 470 171 171 171 358 24 458 192 419 419 439 439 417 237 237 237 237 237 237 237 237 237 237 237 237 237 80 7 7 217 473 65 329 486 460 460 169 169 164 164 25 485 485 485 378 378 88 89 89 116 33 250 70 65 329 329 245 42 147 134 134 139 175 175 423 423 423 423 314 314 239 384 371 180 84 350 350 167 457 309 479 331 84 84 88 88 14 14 411 475 475 475 475 475 475 475 475 422 349 164 214 214 214 214 200 200 255 255 8 354 113 113 113 113 450 167 167 457 36 310 107 107 395 395 106 153 153 122 122 285 300 300 300 275 275 94 117 404 13 414 80 80 491 491 412 412 83 55 55 55 322 67 64 212 114 0 0 139 139 175 81 154 154 154 458 96 66 68 105 105 336 470 151 151 178 35 96 401 36 272 57 57 57 203 64 394 76 377 87 87 87 420 420 420 420 301 43 364 276 346 346 265 265 265 85 146 146 368 453 9 300 300 382 406 467 89 89 446 33 394 478 68 68 68 238 6 272 470 470 443 240 325 41 324 324 286 459 459 469 216 198 114 242 446 94 199 257 257 257 453 168 106 350 350 350 350 350 413 195 195 33 90 32 465 208 79 380 288 365 365 365 365 388 348 64 76 90 393 261 25 91 91 91 91 493 216 300 334 334 59 452 263 229 247 126 126 326 326 326 326 193 193 17 +103-1240-0056 103 715 1 19 33 31 20 24 40 5 25 22 4 25 20 33 19 34 19 26 22 5 37 5 10 8 23 11 4 33 17 30 20 25 17 15 9 5 23 40 31 5 24 18 7 1 12 13 30 40 25 13 37 14 9 19 25 38 5 25 12 13 30 16 6 30 24 4 34 39 36 5 25 11 24 3 30 19 23 5 38 14 17 30 27 25 5 29 38 13 25 12 5 25 39 36 18 7 31 38 5 40 9 19 23 33 1 19 16 12 15 13 37 14 38 14 10 19 23 11 30 5 25 1 38 19 10 19 40 18 3 30 11 33 5 9 19 23 20 37 38 13 25 38 5 25 23 35 22 31 4 33 12 5 24 1 8 38 35 11 5 25 33 9 20 19 25 12 4 33 6 30 16 5 25 40 32 36 40 16 14 13 25 20 34 19 26 1 11 5 5 5 5 4 2 6 7 13 2 2 5 3 2 2 2 1 2 2 2 2 8 4 3 3 1 3 3 2 4 3 2 5 2 2 3 2 4 3 3 2 9 21 3 1 2 3 2 1 4 2 2 4 3 4 3 3 2 4 5 6 2 3 4 5 4 2 4 2 2 1 2 1 5 3 4 6 3 2 4 3 5 4 3 5 2 3 2 2 2 2 3 4 5 5 4 2 2 3 3 2 6 8 16 6 3 2 6 3 3 3 7 7 6 2 3 3 3 2 3 16 4 2 4 2 3 2 3 3 2 2 2 2 1 4 5 4 1 2 2 4 2 3 2 2 4 3 2 2 3 3 5 38 11 4 3 2 2 2 2 2 8 2 3 3 3 2 4 4 3 2 4 2 6 7 5 4 4 3 3 4 3 2 7 7 17 17 17 296 296 317 491 491 184 184 184 412 177 177 177 177 177 177 401 478 66 66 68 68 115 444 444 444 444 360 339 339 53 471 71 342 342 483 440 287 319 319 319 388 348 195 195 90 90 143 401 491 445 445 445 351 351 72 72 351 365 365 365 365 330 94 199 41 41 324 324 143 36 377 87 87 87 164 214 214 214 200 200 192 69 223 130 29 44 44 236 36 310 107 395 351 437 91 91 91 85 139 139 293 122 35 198 45 191 236 131 90 401 82 208 79 288 360 360 360 434 339 248 248 212 445 180 171 171 171 252 215 8 100 100 497 497 497 269 342 68 68 115 273 231 231 231 203 94 58 58 268 268 315 268 450 450 98 98 229 82 247 312 312 126 292 292 292 292 23 23 23 23 408 408 408 391 391 491 491 491 289 289 127 114 0 0 313 186 447 196 479 463 463 463 463 29 29 382 245 8 354 137 137 137 137 116 250 250 250 276 174 174 174 319 319 348 466 466 212 127 114 264 264 264 264 59 59 452 245 349 205 155 155 332 332 332 332 372 245 245 217 217 473 65 486 486 460 460 169 164 164 164 219 219 485 485 132 88 88 89 89 446 33 250 70 65 65 329 495 42 147 380 288 139 139 175 175 423 423 423 423 423 355 245 43 345 347 347 347 245 416 32 239 208 79 380 499 84 496 496 274 274 413 94 479 230 230 230 230 215 35 401 491 354 345 409 409 409 409 466 466 466 22 283 455 116 10 398 398 398 398 132 132 58 58 72 72 268 268 268 268 268 169 169 39 54 142 397 397 141 141 281 54 9 221 336 336 354 180 139 139 139 375 375 274 122 122 227 227 419 439 439 439 417 237 237 237 237 237 47 47 491 47 316 316 491 316 73 491 289 435 188 118 118 118 118 118 402 198 198 0 0 0 0 464 464 464 463 463 463 463 280 29 382 382 245 245 43 364 276 347 347 347 498 498 396 396 313 313 24 36 310 107 107 395 395 106 153 153 387 122 122 161 161 487 334 275 275 116 117 48 229 229 247 126 126 126 326 326 326 326 326 101 101 149 149 228 491 320 320 345 407 407 407 143 107 395 356 257 257 281 9 142 72 437 306 306 306 306 396 313 186 36 377 87 87 87 8 354 425 251 251 241 444 444 444 444 246 246 173 402 402 397 409 409 409 116 250 250 276 174 174 174 319 348 466 250 241 367 367 367 367 35 458 270 270 342 224 415 415 415 457 259 127 114 57 57 203 203 381 48 48 13 13 491 247 312 126 126 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 260 260 391 391 391 391 491 491 289 412 287 287 111 111 111 111 111 438 378 378 364 389 389 389 389 389 314 242 242 242 33 76 76 465 259 354 420 420 420 324 246 3 464 340 340 340 116 466 466 466 114 92 92 92 92 285 34 106 106 153 372 372 396 245 349 349 352 25 242 242 116 33 471 49 49 9 482 338 338 338 395 485 374 374 374 132 132 318 318 49 269 9 142 393 155 332 332 332 332 332 467 467 475 475 475 475 475 475 475 475 422 349 164 214 214 214 214 214 200 200 117 404 404 404 225 225 225 225 225 193 193 +103-1240-0057 103 342 1 24 8 9 5 33 8 29 19 33 20 18 19 24 1 12 4 33 31 38 5 33 1 31 27 31 13 11 24 19 31 19 40 30 15 10 5 23 33 5 12 5 38 8 23 11 30 27 40 9 35 32 5 40 7 33 5 37 12 5 16 5 23 25 5 31 5 37 18 14 18 3 30 33 1 9 6 12 2 2 3 5 7 2 2 6 2 2 8 6 3 4 3 4 3 3 8 46 8 7 7 4 3 4 3 3 3 5 2 4 4 2 3 4 2 2 3 5 8 3 4 4 4 6 3 2 5 4 4 5 3 2 2 1 3 5 2 4 2 3 4 2 2 2 3 5 3 4 6 6 17 17 296 363 363 363 225 225 289 7 7 7 70 70 65 65 284 284 284 265 265 265 85 85 146 146 438 8 354 159 159 285 285 111 111 111 111 438 438 143 129 259 74 74 351 278 278 278 325 183 41 41 324 324 3 183 183 57 57 57 57 203 381 381 117 117 417 417 417 417 417 417 80 80 320 127 114 92 92 92 92 240 35 77 342 9 142 397 336 181 181 181 181 181 385 385 36 227 419 439 439 417 417 237 237 237 237 237 237 237 237 237 237 491 362 491 491 362 491 491 362 362 362 362 491 435 211 211 491 369 369 21 21 21 21 21 21 21 21 21 260 408 408 391 391 391 228 491 491 373 66 66 68 115 273 273 344 344 344 16 274 274 186 186 162 232 68 172 115 470 179 443 120 240 240 314 35 196 217 217 473 258 258 258 342 342 224 494 494 494 281 9 142 397 147 147 329 329 329 329 252 143 36 449 395 302 302 302 497 122 36 36 377 123 123 123 216 22 283 455 43 364 364 276 346 346 346 346 265 85 85 85 139 139 293 293 122 122 131 472 133 147 147 380 288 496 496 496 274 368 31 342 86 142 336 336 354 109 496 278 99 99 436 395 50 50 50 50 185 453 342 168 180 113 113 113 113 169 285 449 34 69 223 130 198 22 283 283 455 349 234 234 261 25 424 424 424 497 497 122 466 81 459 459 271 31 342 224 69 69 130 130 280 156 156 156 156 245 245 72 72 437 306 306 306 306 306 396 396 37 24 227 419 419 427 229 491 247 15 193 193 193 +103-1241-0000 103 798 1 10 4 29 33 14 33 36 1 24 4 34 39 36 22 5 34 9 14 33 1 19 40 31 5 29 30 8 40 11 1 24 4 34 39 36 22 5 34 9 14 33 4 25 11 12 5 31 6 30 5 23 24 13 30 21 3 17 11 22 5 24 16 14 33 5 9 23 20 27 37 14 12 20 15 33 24 8 23 40 33 5 9 30 8 33 30 19 37 14 1 19 33 38 5 40 5 29 30 19 33 20 30 27 11 1 30 5 25 19 26 5 23 6 26 9 19 33 38 20 25 31 25 5 17 16 3 30 24 31 33 13 11 40 38 19 34 25 7 5 25 11 5 17 13 25 1 5 9 19 33 5 37 1 2 16 14 38 35 11 33 19 11 30 8 37 34 30 36 1 10 5 3 3 3 3 6 10 31 6 4 4 2 3 4 5 4 3 5 2 1 4 3 2 3 3 3 8 6 5 71 5 6 3 3 3 6 5 4 3 6 2 3 2 1 2 2 6 2 3 4 3 5 5 5 7 6 3 5 5 2 3 2 1 2 2 3 2 5 6 2 3 2 5 6 4 5 4 4 4 2 2 3 3 4 5 3 3 3 8 23 5 1 2 2 3 3 4 2 2 2 4 5 8 5 3 7 2 3 3 5 3 4 6 4 2 2 5 2 3 5 6 2 5 4 7 3 3 3 3 4 4 4 3 2 4 5 4 7 1 2 1 2 5 8 5 12 6 3 2 2 2 4 2 24 9 8 5 4 3 2 3 3 3 6 2 7 3 7 6 17 17 296 296 317 184 184 184 184 289 320 108 119 351 351 486 460 460 215 96 35 272 300 382 382 313 236 129 75 108 119 351 351 374 374 374 374 132 132 98 98 13 417 417 170 170 170 170 442 491 442 442 312 187 442 12 102 102 12 442 12 12 23 260 260 260 260 260 391 391 316 289 289 289 320 7 217 70 473 486 486 486 460 169 169 35 164 219 485 485 374 132 143 129 321 144 27 351 329 329 329 169 352 164 221 221 321 354 29 382 382 396 313 24 131 483 226 321 188 356 356 31 162 232 172 224 494 494 494 129 74 190 190 499 499 499 265 265 265 85 85 299 299 185 185 433 433 86 238 6 419 439 56 56 237 237 237 237 237 28 28 28 491 28 28 362 362 491 362 491 305 362 362 362 362 491 491 218 40 362 218 218 218 218 491 218 491 218 218 218 218 218 218 218 218 491 491 218 218 218 491 491 369 369 491 369 369 369 369 369 369 369 21 21 21 21 21 101 101 101 149 391 228 321 321 320 7 217 217 473 486 486 486 460 460 169 169 164 164 485 485 485 374 132 143 129 458 144 27 27 351 329 329 151 169 169 164 352 221 221 321 354 29 334 334 59 59 313 24 131 483 440 89 55 446 322 466 22 5 455 38 162 482 482 172 115 273 106 499 372 406 406 467 302 302 497 497 497 497 399 217 217 473 65 264 264 264 264 468 468 468 467 37 236 314 401 401 310 107 395 180 106 499 405 405 206 206 178 96 96 272 472 472 472 401 321 144 27 27 437 319 319 319 53 53 76 205 155 29 134 134 134 134 8 359 359 474 474 474 3 335 14 411 410 410 410 410 410 173 29 29 313 313 216 22 448 448 448 14 411 411 171 171 171 171 252 252 131 472 196 196 70 70 65 65 265 265 85 85 139 450 293 497 49 54 86 238 272 377 123 236 129 259 190 380 499 499 428 428 146 146 358 457 457 133 42 147 380 380 288 288 173 173 29 334 334 59 59 452 452 263 229 491 312 312 312 312 292 292 292 1 1 1 1 1 21 21 408 408 408 408 149 149 289 321 321 412 177 177 177 177 131 133 141 141 141 281 453 168 44 44 44 129 259 190 190 487 288 278 240 325 34 324 324 301 378 42 147 147 380 84 84 496 496 496 496 274 274 37 24 419 439 225 417 417 80 321 320 7 7 147 147 380 499 319 319 348 94 199 176 176 135 135 135 200 200 199 255 255 255 251 251 241 241 431 235 235 235 235 235 235 200 248 248 212 354 255 236 36 108 397 397 487 360 360 360 360 339 339 33 33 394 478 478 232 68 172 115 196 479 331 331 151 319 151 240 416 314 96 393 393 234 234 234 261 25 106 306 306 306 306 396 203 53 394 478 162 86 86 6 272 470 470 120 120 120 37 37 24 77 270 9 142 397 345 333 333 333 220 220 173 164 164 402 472 196 309 479 331 331 315 315 315 450 88 88 242 116 94 199 255 255 416 416 458 445 445 361 361 361 120 120 282 282 282 388 195 117 117 229 247 247 126 126 326 326 326 326 101 101 149 149 321 412 287 44 44 44 215 35 354 278 278 325 34 462 462 462 402 402 401 401 321 259 354 106 106 481 481 481 293 293 186 39 342 224 224 494 242 203 217 473 41 324 324 422 422 349 234 234 234 234 234 261 261 25 498 498 498 498 396 245 245 43 43 364 345 109 109 496 496 496 37 24 314 36 108 377 87 87 236 239 161 161 79 499 499 499 265 85 85 146 146 173 402 402 205 205 234 161 161 487 487 487 374 374 374 132 132 132 98 229 247 15 15 193 193 17 +103-1241-0001 103 777 1 6 30 5 18 3 23 27 38 13 30 38 8 23 11 29 23 5 24 40 18 5 26 7 33 12 13 30 16 19 23 24 20 9 23 36 24 1 12 20 13 30 38 5 40 31 38 20 33 38 19 34 12 5 9 30 13 34 5 37 24 13 25 20 1 4 29 5 23 6 30 10 14 11 40 1 5 25 11 12 5 24 13 11 27 40 31 23 27 29 33 5 38 15 19 25 12 5 11 19 31 33 5 25 31 33 5 18 14 8 40 5 25 24 19 31 33 31 5 37 29 14 23 5 25 11 1 29 14 29 5 23 38 8 23 12 5 23 19 33 5 23 9 14 11 40 31 4 26 13 40 19 16 19 33 38 14 12 5 38 5 25 11 15 5 37 31 5 24 14 19 25 6 23 12 20 39 19 30 1 7 5 3 3 6 3 5 7 2 4 4 5 7 3 4 6 2 3 7 5 4 2 5 5 4 3 3 3 6 3 3 3 4 4 3 8 8 29 5 4 7 5 3 1 3 6 3 4 5 2 1 3 2 2 5 3 5 5 1 5 3 2 3 6 4 6 4 2 4 5 3 7 4 3 5 11 6 2 2 1 2 5 3 3 6 3 9 3 3 4 2 4 4 5 2 3 1 2 4 3 5 3 2 4 3 3 2 4 7 8 6 2 7 3 3 5 6 3 2 3 9 9 5 3 2 4 1 4 4 5 2 7 7 8 4 3 2 4 2 2 3 2 4 7 5 2 5 6 6 3 4 2 4 2 6 4 6 3 4 6 3 7 2 6 2 3 7 2 5 2 3 4 5 7 3 2 6 3 11 17 17 17 363 51 51 51 228 321 320 157 157 157 157 372 467 44 44 44 58 72 72 72 437 437 481 481 481 481 175 175 81 84 84 84 16 274 274 274 43 345 345 109 109 264 468 245 245 245 43 364 276 276 346 346 284 265 85 85 85 139 139 293 293 122 122 472 221 129 321 75 74 425 425 386 431 319 319 319 319 203 203 381 381 381 471 185 49 342 342 342 142 72 437 189 189 189 319 189 200 200 180 180 113 113 113 113 167 167 457 401 321 75 127 114 222 222 222 468 245 349 349 234 205 205 261 25 278 139 139 139 293 203 399 70 429 324 324 324 301 32 239 259 354 425 425 241 374 374 374 374 374 132 132 132 381 381 381 381 404 13 13 78 170 170 491 491 491 491 28 491 341 211 341 12 292 292 21 21 21 21 21 21 21 21 408 408 408 408 149 228 321 321 320 7 127 5 448 448 14 14 411 411 264 264 264 264 264 468 468 468 245 43 43 345 141 141 281 162 54 232 482 482 105 397 397 109 109 213 213 213 358 358 36 36 472 397 397 333 333 220 220 314 198 127 22 283 455 236 129 321 354 190 79 380 288 443 443 443 169 169 164 164 164 164 69 69 130 130 402 402 196 217 217 473 432 330 116 94 337 324 324 324 3 3 197 197 226 226 209 209 145 145 486 460 460 215 215 35 29 100 302 497 497 335 14 411 153 153 153 372 372 396 396 36 36 107 107 395 334 334 334 59 37 37 24 471 270 269 433 427 427 247 247 126 126 326 326 326 326 408 149 149 149 321 412 83 55 55 55 322 466 466 22 5 5 455 399 217 217 473 65 443 443 443 240 325 34 84 84 84 496 274 274 186 162 54 482 482 482 482 482 482 26 26 26 241 431 84 496 496 496 215 35 96 96 36 272 255 255 255 43 364 109 109 403 403 403 171 464 464 340 340 116 466 466 22 283 455 236 239 384 371 278 278 278 31 342 86 86 238 6 272 11 11 11 379 379 471 471 49 9 238 6 272 87 87 87 58 72 156 156 255 42 42 147 147 380 499 499 265 265 85 85 146 146 368 368 368 342 342 224 242 242 116 116 33 33 33 90 250 217 217 473 473 278 278 278 31 39 86 86 238 238 401 491 270 270 270 342 168 69 462 462 130 402 221 401 321 321 74 190 190 437 498 498 498 498 498 498 134 16 302 182 302 302 497 175 175 81 89 89 446 446 67 212 131 472 221 401 321 74 190 492 492 498 498 498 215 215 35 259 74 100 100 100 100 375 375 375 375 98 43 7 7 7 276 346 346 346 346 315 85 85 85 139 139 293 293 293 122 35 198 22 5 5 251 251 241 431 278 278 285 449 302 302 497 497 497 8 8 259 354 29 498 498 498 498 498 396 37 37 314 77 478 232 232 232 172 115 115 273 470 486 486 365 365 365 365 328 200 200 200 248 253 253 253 31 342 342 168 118 118 118 118 280 29 177 177 177 314 131 133 364 364 276 347 347 347 347 498 498 467 313 313 216 216 22 283 283 455 43 43 364 276 174 174 174 174 319 319 348 348 195 195 195 64 212 212 93 93 93 93 93 464 464 69 462 130 402 402 162 232 68 172 115 273 273 319 319 203 53 53 29 29 495 467 467 89 340 116 94 335 14 14 411 411 297 297 297 297 297 182 182 497 122 216 216 22 283 448 219 219 219 219 286 286 286 286 286 286 286 59 59 59 452 452 263 417 417 417 491 491 421 421 491 421 128 128 491 128 491 128 128 193 193 193 17 +103-1241-0002 103 778 1 24 4 34 39 36 19 25 21 28 11 12 5 11 30 8 37 4 16 33 14 18 19 40 27 25 16 4 32 5 25 1 19 22 31 13 29 33 11 14 19 26 12 5 24 27 24 5 25 33 31 38 19 25 18 20 24 13 33 38 19 24 5 25 5 25 11 18 4 11 33 5 25 3 11 33 5 12 5 24 1 16 6 30 19 25 29 30 19 25 31 13 11 38 14 11 8 23 5 25 11 39 36 14 31 5 29 27 40 11 33 19 25 3 11 33 36 6 23 5 25 11 31 5 25 11 30 20 39 36 24 20 33 3 25 12 5 30 27 11 1 38 13 12 14 39 36 25 27 12 5 24 14 25 3 33 1 24 4 34 39 36 11 30 13 11 19 11 6 23 38 19 24 5 25 19 22 31 13 29 33 24 3 30 19 23 5 4 25 11 24 19 31 19 40 30 15 10 5 23 1 20 6 5 3 2 3 3 4 3 6 3 2 1 3 4 6 3 4 3 3 1 2 1 4 6 3 5 5 7 2 7 17 4 3 3 4 1 2 3 2 3 2 2 3 3 4 4 2 1 2 2 2 2 1 5 2 5 3 5 3 3 3 3 3 2 2 1 5 2 2 4 2 5 7 3 4 2 3 2 8 17 6 4 3 3 3 3 2 2 4 3 3 3 2 2 2 7 4 3 2 2 2 5 3 3 2 3 5 5 1 2 2 3 6 2 2 3 8 5 3 2 3 6 2 3 2 2 2 4 3 3 3 3 3 2 2 3 4 7 4 13 4 3 2 2 3 3 4 5 2 2 3 4 3 8 6 39 6 7 3 4 3 3 4 2 2 3 8 9 8 3 3 3 4 4 4 4 4 2 4 3 5 1 2 3 4 3 3 2 4 1 3 4 2 5 3 4 4 4 6 10 17 17 17 296 296 52 52 52 52 52 52 52 52 52 408 101 51 149 149 321 321 7 7 217 473 65 329 329 460 460 169 164 164 485 485 485 485 378 88 121 121 121 116 33 394 239 107 395 470 153 153 387 387 146 146 314 35 259 22 283 455 236 239 161 79 79 499 499 265 85 85 85 146 173 173 280 145 145 460 460 460 169 402 36 272 495 495 467 257 257 257 257 342 168 180 84 350 350 350 350 413 33 394 90 393 234 261 261 25 486 486 486 460 460 169 99 436 436 436 60 298 298 298 275 303 303 117 404 229 491 247 126 126 126 326 326 326 326 326 326 408 408 408 149 228 321 321 412 188 154 154 154 96 96 172 172 273 470 151 151 215 215 96 36 272 161 495 495 467 467 135 135 200 248 466 22 283 455 399 399 70 65 65 84 496 496 203 53 53 291 291 379 379 49 9 142 397 345 409 409 409 409 58 183 451 30 30 30 301 399 217 217 473 443 443 443 240 36 449 472 133 133 364 276 109 278 278 278 399 217 473 136 275 275 116 195 199 89 89 322 67 199 58 110 110 110 110 254 254 314 35 401 75 377 87 87 87 10 10 309 479 331 331 284 284 405 206 206 206 314 314 401 75 108 377 123 123 123 216 114 114 57 57 57 381 381 381 48 48 229 414 491 312 312 126 292 292 23 23 23 23 23 101 260 391 391 228 289 321 321 373 155 155 332 148 148 387 387 372 406 467 467 242 121 203 53 394 76 74 190 190 487 288 330 379 33 394 77 342 342 273 470 443 240 240 133 133 133 345 382 313 285 14 411 284 265 85 85 146 146 175 175 175 81 275 275 275 116 64 212 131 219 152 152 152 378 353 353 353 313 186 54 54 224 494 236 259 74 437 496 496 496 496 274 186 186 323 238 238 6 272 87 87 116 10 479 331 106 284 405 206 206 167 35 75 377 377 123 123 14 14 411 411 411 297 424 297 182 182 293 293 175 175 89 89 446 446 33 394 478 478 232 232 172 172 273 273 319 319 348 64 64 212 161 300 337 41 219 219 219 219 152 152 152 399 217 473 213 213 213 252 449 449 106 125 125 125 125 466 22 283 455 42 42 147 380 380 84 496 496 496 496 274 274 37 24 404 427 321 247 126 126 326 326 326 101 101 149 149 149 321 321 320 7 345 109 409 181 240 216 300 300 300 219 219 152 152 152 152 10 10 479 331 84 84 84 274 216 216 114 57 203 399 70 157 157 157 157 313 10 479 331 331 307 307 307 307 61 167 167 233 227 227 419 439 417 170 170 170 170 28 491 28 28 491 491 28 362 491 491 40 305 305 305 40 40 40 40 40 40 163 491 491 366 366 366 163 491 316 316 491 435 289 321 321 320 7 217 217 473 65 486 486 486 460 460 169 169 164 164 485 219 219 485 152 152 301 422 239 36 161 79 79 288 288 443 240 325 34 191 191 191 314 131 472 14 14 226 321 321 411 297 297 297 297 297 297 297 297 297 297 293 497 497 497 122 43 364 276 109 109 278 278 399 217 473 136 275 275 275 195 195 195 335 440 440 154 154 154 458 96 96 68 172 273 470 151 151 215 35 96 272 472 472 472 196 70 70 70 65 495 495 380 467 256 139 175 251 241 423 423 423 423 355 89 89 446 116 33 250 250 217 473 258 258 258 342 342 224 494 494 494 31 9 142 142 397 147 147 329 329 329 329 329 143 36 310 107 395 302 302 302 375 98 98 13 229 491 247 312 15 15 15 15 193 193 193 17 +103-1241-0003 103 694 1 18 20 18 4 11 5 25 5 25 22 5 24 16 14 33 5 9 5 23 16 20 23 19 26 1 12 5 33 12 5 24 19 31 33 19 30 20 5 31 22 30 20 10 14 40 38 14 31 20 22 30 19 33 23 20 23 4 16 19 26 4 33 18 19 24 1 18 20 24 15 18 4 37 9 19 25 22 38 8 33 30 8 33 19 25 34 19 26 22 19 26 31 27 1 16 6 30 18 20 38 5 40 5 25 1 3 11 23 35 22 19 26 29 14 31 19 25 19 21 1 38 19 12 5 25 5 25 17 15 25 23 20 16 19 17 39 14 1 4 25 11 23 6 26 1 8 14 25 17 30 15 18 13 30 12 5 33 5 10 33 18 19 40 31 33 36 29 19 26 32 27 23 11 14 40 1 12 4 2 6 1 2 2 3 3 4 5 3 3 2 2 1 2 2 2 3 8 5 2 3 7 11 2 2 1 2 2 2 2 6 3 2 3 3 4 4 5 3 3 5 3 4 2 2 7 3 3 2 3 2 2 4 7 6 5 2 5 3 3 3 2 8 19 4 3 4 3 1 2 2 1 2 3 3 3 5 3 3 4 3 2 3 3 1 4 3 2 5 4 9 14 5 2 2 3 2 2 2 3 2 5 1 9 2 3 2 4 2 4 5 4 5 2 1 4 12 1 3 2 2 2 2 4 5 3 8 4 1 4 9 3 3 2 6 14 5 2 6 1 9 7 1 13 2 7 3 3 8 8 6 4 3 1 8 4 7 2 1 2 3 8 3 4 3 2 4 7 4 3 3 5 6 7 17 17 17 363 363 363 363 363 408 51 51 491 321 451 451 30 30 30 58 72 110 110 110 254 254 254 285 44 44 44 94 199 331 319 319 319 348 394 76 465 144 27 27 437 319 319 319 53 53 77 205 155 29 6 134 134 134 8 354 100 100 100 497 349 349 234 234 234 261 261 25 485 213 485 286 139 139 175 175 81 176 176 328 328 200 200 117 229 321 247 126 126 326 326 326 101 408 149 228 321 321 45 45 45 45 198 22 5 455 399 473 473 494 38 162 232 232 86 238 6 272 485 485 286 468 468 337 337 485 324 459 459 271 31 54 9 142 221 336 259 208 208 190 487 288 213 213 213 252 36 310 107 395 334 334 304 304 49 269 142 397 397 141 141 281 162 232 232 172 115 273 444 444 213 252 143 458 208 79 487 313 236 143 36 26 359 166 166 166 324 301 251 251 251 251 241 241 431 376 376 376 460 460 169 169 352 164 164 25 176 135 328 200 464 464 415 415 415 415 415 36 131 183 57 57 57 57 57 381 381 48 48 417 417 170 170 170 28 491 28 491 341 491 341 341 491 163 491 491 316 316 316 321 435 289 373 451 451 30 30 301 399 217 473 65 476 476 464 464 202 202 202 8 137 137 137 116 90 90 76 208 441 441 346 346 428 428 146 146 24 35 133 133 147 380 499 428 428 428 146 143 449 449 89 340 116 394 212 164 164 214 214 360 360 200 76 465 192 192 176 135 200 200 248 248 478 342 172 115 273 84 84 84 84 274 98 13 229 247 312 126 126 292 326 326 23 23 101 101 101 149 391 228 321 321 155 155 155 148 148 372 58 183 451 30 30 378 378 141 141 141 281 342 168 44 44 44 94 199 335 14 14 411 284 284 284 405 405 206 206 240 314 26 26 241 241 367 367 367 458 321 192 176 135 200 200 248 248 465 259 74 74 492 492 492 492 271 150 342 342 224 224 242 116 199 459 459 469 469 37 24 75 310 107 107 447 97 225 225 80 321 321 320 345 333 333 220 220 22 44 44 94 199 331 319 319 319 348 348 33 394 394 212 239 445 180 290 290 290 290 434 434 434 339 33 359 359 81 166 324 324 422 422 349 234 234 234 234 261 25 25 278 278 278 416 458 192 300 334 334 355 355 452 263 229 247 247 126 126 326 326 326 326 101 101 149 149 228 321 412 83 55 55 446 322 67 33 33 250 251 251 241 241 431 235 235 235 235 235 235 235 235 235 348 200 200 200 248 335 14 14 14 226 209 287 265 265 265 85 85 146 146 146 299 242 242 339 195 195 248 248 212 239 208 79 79 288 288 403 403 403 171 171 171 3 58 58 72 72 72 72 110 264 264 264 264 264 264 264 468 468 59 313 216 216 127 45 45 45 236 129 75 108 119 351 351 151 151 151 169 178 36 310 447 447 447 6 272 257 257 257 257 257 99 338 338 338 447 482 238 238 336 321 75 371 374 374 374 374 215 129 354 176 176 176 328 200 200 248 248 186 338 338 338 395 470 106 424 424 424 424 497 122 122 131 300 334 334 304 304 185 185 269 9 427 229 247 247 15 15 193 193 193 17 +103-1241-0004 103 737 1 5 25 11 5 16 35 23 31 6 16 33 9 30 7 25 9 19 30 11 1 38 19 10 18 20 18 4 11 38 6 30 25 13 37 14 31 19 25 31 18 20 38 5 40 33 38 13 25 33 20 1 19 25 16 4 22 33 1 18 20 18 4 11 23 35 22 33 4 33 38 13 25 33 20 1 37 13 30 20 24 5 10 13 40 18 20 23 35 22 33 4 33 31 19 22 31 33 20 1 23 4 22 19 26 5 23 19 33 5 23 5 37 12 5 17 30 15 25 5 31 1 18 38 13 25 18 20 30 20 10 33 9 30 8 33 30 19 37 14 1 12 13 30 38 5 40 25 27 31 8 25 5 37 13 25 20 33 30 15 25 1 8 4 2 2 3 9 11 4 9 6 5 4 4 2 9 7 3 7 5 7 2 3 3 3 2 1 2 1 5 3 3 3 2 2 4 2 4 2 3 2 1 2 2 1 4 3 2 2 3 3 9 40 5 4 4 9 3 4 17 5 3 2 3 3 4 3 4 2 3 7 2 2 3 3 10 5 6 3 2 4 4 3 4 2 2 2 3 2 3 3 2 2 3 4 2 3 3 3 8 14 7 4 4 3 4 2 3 2 2 3 2 2 3 1 2 3 3 4 3 5 13 72 5 2 2 2 3 3 4 4 4 5 2 3 5 6 3 3 3 9 6 4 2 3 3 2 5 4 6 7 6 3 2 3 2 3 3 6 2 5 9 4 17 17 17 363 363 51 51 228 321 412 55 55 322 67 212 34 44 44 462 349 234 234 234 234 234 261 261 25 424 424 424 424 424 182 182 182 497 497 497 497 497 497 186 186 162 482 482 482 482 115 115 273 106 405 405 405 405 206 169 349 352 352 402 6 272 472 221 336 336 354 190 380 288 315 315 315 315 450 450 450 413 413 348 33 33 33 394 394 32 239 75 354 485 213 286 286 286 286 286 286 334 59 59 37 37 24 131 404 225 225 225 225 225 320 345 407 407 407 407 36 107 107 400 30 30 464 254 254 254 314 133 133 364 276 276 153 153 153 387 372 396 388 94 199 145 463 463 173 280 29 313 186 162 342 342 224 494 379 379 379 77 342 224 30 30 378 345 141 141 281 31 9 6 272 119 397 441 109 432 330 348 64 64 76 449 41 41 41 41 19 19 454 13 417 170 170 170 491 28 28 28 28 491 491 2 2 2 491 362 362 102 362 362 362 491 362 362 491 305 305 366 366 366 366 366 435 435 316 316 435 435 435 289 321 321 321 209 188 340 340 33 33 90 349 205 234 261 25 470 486 376 376 376 376 460 460 178 178 233 96 75 419 427 321 247 126 126 126 326 326 326 326 326 326 326 101 408 408 408 149 321 321 373 451 451 30 30 30 58 58 110 254 254 254 254 314 26 26 251 241 367 367 367 367 367 35 96 321 75 34 415 415 415 385 314 259 108 119 397 441 441 432 330 330 379 64 76 36 449 41 41 41 41 19 19 454 454 454 414 47 47 80 80 80 321 321 7 7 32 280 104 104 104 104 104 337 337 337 324 301 399 217 383 383 383 383 383 383 310 107 34 253 253 253 342 224 30 30 30 301 26 241 367 367 367 35 96 321 272 34 415 415 143 478 478 68 68 115 273 278 278 178 96 96 86 86 238 6 272 41 41 41 19 19 454 417 417 417 47 47 47 47 491 47 47 47 80 249 435 435 321 321 321 7 7 251 241 241 431 376 376 460 178 178 458 192 192 176 176 135 200 200 200 464 255 251 251 241 431 278 278 278 26 26 302 302 175 175 69 223 130 130 198 22 283 455 416 144 208 79 380 288 288 290 290 434 339 339 199 459 459 459 271 271 39 39 433 433 160 160 112 112 56 56 56 56 28 28 491 28 28 28 28 362 362 491 362 362 362 362 362 362 491 491 362 362 491 362 218 40 40 211 369 369 369 369 369 369 369 369 369 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 260 260 260 260 260 260 260 260 260 260 260 163 163 163 366 491 316 316 316 491 316 73 73 289 321 321 7 409 409 409 409 94 58 183 451 30 30 301 378 42 147 147 288 213 213 213 252 36 107 447 447 6 472 472 221 336 321 354 190 380 499 428 428 428 146 146 143 35 472 133 133 147 147 147 288 288 278 173 173 280 29 334 334 355 355 452 452 263 263 417 417 417 442 80 80 435 321 435 127 127 0 222 222 245 378 8 345 141 141 281 281 9 9 238 196 309 479 331 231 231 231 231 231 274 274 186 162 232 172 172 115 273 480 480 480 85 85 299 299 94 199 69 223 130 280 34 475 475 475 475 475 475 324 422 36 310 161 161 161 487 487 288 290 290 290 434 434 339 303 303 48 48 48 417 417 417 417 193 193 17 +103-1241-0005 103 701 1 18 20 34 6 33 18 20 38 5 40 33 36 14 23 20 31 27 18 20 33 8 11 18 19 40 18 6 30 31 19 25 12 20 39 3 30 11 5 37 12 5 31 24 6 23 1 9 30 8 33 30 19 37 14 18 27 33 13 23 1 5 25 11 38 13 25 33 27 37 14 33 19 12 5 31 33 15 32 5 25 18 7 31 1 12 5 23 6 26 29 23 4 33 16 6 30 24 38 5 40 1 6 23 24 27 31 33 11 19 40 14 33 19 11 1 12 20 27 25 23 20 23 19 37 19 26 22 30 20 10 14 19 25 31 8 33 1 9 20 19 26 5 17 14 23 18 36 38 5 40 31 19 33 19 26 6 25 5 29 8 23 5 37 32 19 26 17 5 23 40 4 33 12 20 13 22 31 33 30 20 24 13 25 11 1 11 6 3 4 4 2 2 2 2 1 4 5 6 6 3 5 5 2 5 3 6 6 2 2 2 4 2 3 3 5 1 2 2 1 4 6 2 2 2 1 2 3 6 4 7 5 1 3 3 5 6 2 2 3 2 4 5 6 4 8 15 6 1 2 3 2 3 3 5 2 3 3 3 1 3 3 3 4 6 1 2 4 7 11 21 2 3 4 6 7 4 3 4 3 5 2 3 2 2 2 4 1 5 3 3 3 2 2 2 2 5 5 2 3 5 8 3 4 7 2 2 4 3 3 2 2 4 3 3 3 5 3 2 3 7 7 4 1 2 7 1 4 3 5 7 6 3 3 3 2 2 4 3 2 5 3 3 2 3 6 7 2 2 3 6 2 5 2 2 3 3 2 2 3 2 2 5 2 4 3 6 4 5 5 6 13 17 17 296 296 296 317 491 491 184 184 184 321 435 435 451 451 30 30 30 422 164 164 106 106 106 153 387 387 285 449 451 30 301 378 378 141 141 281 281 9 238 6 108 377 344 344 374 132 132 88 147 109 498 498 498 498 134 134 175 81 474 41 41 19 19 186 162 68 172 115 273 84 496 274 58 451 451 30 30 422 422 36 108 119 119 437 265 265 265 85 85 146 146 24 131 257 257 257 257 31 54 9 142 397 441 153 153 153 372 396 313 186 342 342 224 340 340 340 466 22 283 448 448 219 219 464 180 180 306 306 306 306 396 396 24 285 69 223 130 198 22 283 455 38 162 232 482 482 482 105 196 70 70 65 65 481 481 481 481 182 293 293 497 122 129 401 321 321 190 190 380 380 499 428 428 428 146 146 385 36 472 133 42 147 147 288 278 278 173 280 29 29 245 58 58 72 496 496 496 274 274 143 75 108 119 119 351 351 256 139 139 139 139 139 375 375 375 98 13 321 247 126 126 126 326 326 326 326 326 326 326 101 149 149 228 321 412 83 55 55 322 67 67 133 364 109 109 189 330 330 348 64 36 34 180 410 410 410 410 410 280 29 29 313 236 36 377 377 123 123 216 22 283 283 38 162 232 68 238 272 470 470 171 171 171 99 99 436 436 60 298 116 33 199 58 58 268 268 268 268 268 268 268 169 169 186 39 323 390 390 18 18 112 112 56 491 56 312 491 491 312 491 12 12 12 12 21 23 23 260 260 260 391 149 228 491 321 321 127 5 5 455 251 251 241 431 235 235 235 235 235 235 235 235 348 200 248 248 90 465 259 74 425 386 386 431 486 486 460 240 35 35 393 393 155 155 148 148 148 148 387 387 203 53 250 345 141 141 141 281 9 483 14 226 226 209 411 297 297 297 297 203 53 53 65 496 496 368 31 54 6 6 272 490 490 490 368 453 9 168 498 498 498 498 396 313 325 449 191 191 191 37 24 24 404 13 417 47 47 491 491 321 80 321 321 435 5 448 448 14 14 411 411 350 350 350 350 350 350 466 81 166 166 324 301 301 251 251 241 431 278 278 173 280 176 176 328 200 248 394 465 208 208 487 487 213 213 213 422 36 310 107 395 334 495 406 340 340 340 340 33 394 478 66 68 68 115 273 273 265 265 265 428 85 146 358 24 36 472 472 336 259 354 420 420 420 360 360 135 135 135 135 200 200 44 44 44 44 416 321 144 208 498 498 498 498 498 498 134 302 302 375 375 293 497 98 225 489 489 489 489 378 43 345 141 141 281 162 232 68 68 115 115 470 278 278 325 325 176 176 176 135 135 200 200 199 125 125 125 125 199 44 44 44 129 259 321 74 437 437 265 85 85 85 85 85 139 175 175 81 462 462 130 402 99 338 338 338 338 395 395 360 360 200 200 200 248 212 302 302 302 302 497 185 49 342 168 415 415 415 198 22 448 448 448 448 154 154 154 458 321 96 96 482 482 447 238 6 161 487 487 288 360 360 360 434 434 301 399 217 473 65 432 432 432 120 330 388 388 303 303 303 243 243 131 419 427 491 229 247 15 15 15 15 15 193 193 193 193 193 17 +103-1241-0006 103 761 1 24 4 34 39 36 9 13 30 23 20 25 27 33 19 26 12 5 33 19 33 38 5 40 5 17 14 23 1 31 8 11 5 23 11 29 4 31 33 18 14 13 40 22 38 19 22 23 20 13 40 29 3 31 5 9 5 23 38 19 12 7 33 23 35 22 19 26 4 33 18 14 1 18 4 11 18 20 23 35 22 33 1 18 20 22 35 11 18 3 30 11 23 20 18 4 37 16 15 23 11 33 5 25 27 33 5 31 12 5 33 13 25 31 30 19 21 19 11 5 33 20 4 25 11 13 22 31 29 13 22 33 15 32 5 25 5 37 18 14 4 33 5 33 36 11 4 25 11 19 22 31 29 30 13 32 5 25 1 32 20 38 5 40 31 19 33 19 26 12 13 30 38 15 33 19 26 16 14 31 5 24 34 19 26 6 30 31 5 24 9 3 11 20 1 15 5 6 4 4 5 4 3 5 3 2 5 2 10 2 4 1 2 1 2 4 6 3 3 3 5 6 7 3 9 7 2 2 3 4 5 4 4 2 3 2 3 6 3 3 2 4 2 4 2 4 3 4 4 2 2 2 1 3 2 2 6 2 3 2 4 2 4 3 2 4 6 26 6 3 2 4 3 4 4 5 4 14 6 2 4 1 2 6 3 3 3 2 1 2 1 2 6 4 4 1 3 2 6 4 3 3 6 2 2 8 2 5 6 2 2 6 2 2 3 4 8 1 2 2 3 4 3 2 2 5 4 4 6 1 2 2 2 3 5 6 3 2 6 4 3 2 2 2 2 3 3 2 2 2 6 3 6 26 7 4 4 1 5 3 2 2 4 5 2 3 6 4 5 3 2 5 4 3 8 2 5 4 3 4 7 3 8 2 4 2 2 2 9 7 17 17 17 296 363 363 363 363 363 101 51 51 51 321 321 320 7 217 473 65 329 329 329 460 460 329 329 164 164 485 485 219 485 485 477 374 132 132 132 32 321 321 354 180 264 264 264 468 468 313 134 175 359 166 166 166 301 10 479 331 84 84 496 496 285 459 459 459 31 342 342 224 176 176 328 200 200 248 212 45 45 45 325 177 177 177 177 43 43 364 364 276 346 346 141 141 141 368 453 342 168 44 44 416 416 321 144 27 498 498 498 498 467 302 302 375 375 98 98 98 13 225 321 435 225 7 373 66 66 68 68 172 115 273 265 265 265 85 85 146 146 285 285 302 302 302 497 122 122 472 472 259 74 74 437 437 311 311 311 460 169 150 39 86 238 6 272 300 334 334 406 406 467 356 281 281 9 9 142 221 336 321 208 208 441 109 278 278 178 143 458 192 26 359 359 474 166 464 464 253 253 253 342 142 221 321 74 437 437 405 405 206 150 342 342 224 494 134 8 100 100 100 100 497 345 333 333 220 216 180 113 113 113 113 167 167 457 251 251 241 367 367 367 367 458 192 192 135 135 200 200 464 415 415 415 415 415 285 156 156 156 156 59 59 452 263 13 229 491 247 312 312 312 292 292 292 292 292 1 21 21 21 21 21 21 21 260 408 408 408 149 149 321 321 321 373 72 110 430 430 430 430 430 430 325 183 183 451 30 30 301 301 251 251 241 367 367 367 367 367 367 233 96 96 6 227 419 427 56 170 442 442 201 201 201 201 201 201 491 47 491 491 491 435 435 435 320 451 451 30 30 422 458 144 389 389 389 131 58 72 72 72 437 306 306 306 306 306 396 313 285 26 359 166 166 166 166 464 202 202 349 205 234 234 261 261 25 470 443 139 139 139 293 497 122 35 75 377 87 87 87 116 10 10 479 331 84 84 496 496 274 285 325 459 459 459 271 31 342 342 86 198 198 127 283 5 236 129 129 321 108 119 119 351 432 432 330 330 33 195 471 77 269 238 272 447 397 336 147 456 456 236 236 239 310 107 395 278 278 278 325 34 469 469 236 36 108 449 41 41 41 324 324 246 3 464 89 89 446 446 212 131 145 443 178 178 458 96 96 86 105 105 336 470 470 151 178 143 96 401 321 75 108 119 418 418 418 418 418 186 99 436 436 60 60 298 116 199 69 223 130 402 402 156 156 156 245 14 14 411 411 145 145 145 460 460 240 325 34 469 469 143 321 108 449 485 485 485 374 132 132 325 325 34 89 446 446 67 131 34 154 154 154 96 96 54 142 105 336 190 380 288 151 151 169 99 436 436 60 60 298 298 298 303 303 303 48 404 229 491 491 312 312 312 292 292 292 292 292 292 21 21 21 21 21 21 21 21 408 408 408 408 391 491 491 321 373 373 338 338 400 400 400 30 301 378 378 345 141 141 141 281 162 162 232 68 68 115 470 278 278 278 325 176 176 176 135 135 200 248 248 212 127 114 264 264 264 264 468 245 245 43 43 364 364 109 109 109 171 171 171 252 449 449 176 176 135 328 200 200 248 248 393 155 155 332 332 332 186 162 232 68 68 68 115 273 231 231 231 231 53 53 394 76 164 164 214 214 214 214 214 328 200 200 117 335 14 209 157 157 157 157 157 313 186 186 162 232 68 68 68 115 273 231 231 231 53 53 212 212 65 493 493 240 325 41 41 41 19 19 19 454 229 491 247 15 15 193 193 193 17 +103-1241-0007 103 757 1 4 25 11 1 31 19 25 31 19 33 19 26 4 25 11 38 15 33 19 26 38 5 40 12 20 27 25 23 20 34 19 26 33 19 11 36 21 5 31 33 12 13 25 1 32 20 31 4 33 1 4 25 11 38 15 33 19 11 38 19 34 1 6 23 18 14 24 8 33 5 25 11 24 15 25 1 24 4 34 39 36 19 25 22 7 25 33 14 11 12 5 31 33 15 32 5 25 24 4 31 33 14 23 3 22 19 26 5 29 12 5 33 19 22 5 33 6 16 5 31 29 30 13 29 30 5 33 6 30 20 33 19 17 27 19 26 18 27 24 16 14 31 5 29 14 1 5 25 11 4 31 33 19 24 19 16 12 5 16 8 37 34 14 11 20 33 30 15 25 38 35 11 1 31 36 25 9 20 5 23 6 26 1 10 12 3 5 1 8 3 3 7 3 2 4 4 1 2 2 3 4 3 3 3 2 2 2 3 4 6 1 3 3 4 2 10 3 2 4 8 3 5 5 1 2 6 6 11 6 4 7 6 4 1 2 3 3 4 4 3 2 5 2 2 4 3 9 4 6 2 6 5 4 2 1 5 2 7 9 42 6 4 4 2 4 2 3 4 6 2 3 2 2 1 3 3 3 3 6 2 2 3 5 3 4 2 6 3 4 2 5 3 4 2 2 5 2 3 2 2 5 6 3 3 4 2 3 2 2 3 3 3 3 3 2 2 5 4 5 3 4 5 4 3 3 5 2 5 7 16 6 1 3 7 3 3 2 3 2 2 2 4 5 7 2 5 3 3 3 6 2 5 3 2 2 5 4 7 6 2 2 2 4 3 6 8 20 17 17 17 363 51 51 228 184 321 321 209 83 194 194 194 194 194 194 194 194 194 282 388 195 195 212 212 131 483 197 197 197 197 66 66 68 68 115 273 494 278 330 379 33 394 478 478 68 68 68 115 115 470 278 278 325 449 176 176 176 328 328 200 200 464 89 446 116 131 133 364 109 109 403 171 171 171 252 449 449 176 176 328 200 200 250 345 141 141 281 281 9 198 22 448 448 448 14 411 350 350 350 350 350 350 348 359 81 324 324 324 422 164 164 164 214 214 214 214 214 200 200 200 195 248 248 394 76 75 377 87 87 87 236 321 384 371 374 374 374 374 132 132 236 36 310 107 395 395 151 151 151 169 150 39 86 86 238 6 127 114 361 361 361 361 361 388 388 303 117 48 229 321 247 126 126 292 292 408 408 408 408 391 321 321 373 373 400 400 400 400 30 422 422 162 232 68 68 115 273 470 486 486 486 460 460 169 169 36 227 483 226 440 89 446 322 67 394 133 364 321 364 109 109 171 171 171 252 252 449 191 191 191 191 131 133 133 321 345 333 220 220 220 164 483 14 226 321 321 209 411 297 297 297 297 297 297 297 293 497 175 81 58 58 156 156 156 156 156 245 399 217 217 217 70 65 65 428 428 146 146 358 449 449 449 242 116 116 33 33 217 217 217 217 473 290 290 290 290 290 434 434 434 339 303 303 48 48 48 417 491 170 170 28 28 28 28 491 362 362 362 491 491 362 362 491 362 491 491 211 211 491 341 369 369 369 369 21 21 21 21 21 21 21 21 21 101 101 101 149 149 228 321 321 7 217 473 473 329 329 329 460 169 169 164 164 219 219 485 485 378 88 88 242 446 348 90 90 465 445 445 351 351 486 315 319 450 413 413 76 449 449 300 191 313 314 198 22 283 455 38 232 232 238 6 272 470 171 171 171 252 99 436 436 60 60 298 116 33 250 217 473 65 486 486 460 460 169 150 54 238 6 272 300 334 382 313 251 251 251 241 431 405 405 405 206 178 458 192 176 135 135 200 200 200 199 230 230 230 230 215 35 96 198 22 283 455 236 129 321 108 119 351 278 278 143 458 192 192 277 385 325 180 106 405 405 405 206 169 352 352 25 459 459 271 271 31 9 142 221 321 259 190 488 488 488 488 215 35 29 29 382 313 236 36 36 119 351 153 153 153 372 467 337 337 301 236 108 377 123 123 416 458 144 180 180 84 84 496 88 88 176 176 135 328 200 200 248 58 72 437 437 350 350 350 350 203 53 381 394 394 155 155 332 332 332 186 162 342 115 273 273 151 151 215 215 354 29 334 334 334 59 452 452 229 321 247 126 126 126 326 326 326 326 326 326 326 326 101 149 149 149 228 321 83 55 55 322 67 212 34 145 145 486 376 460 460 169 150 150 86 238 6 272 57 57 57 203 473 118 118 118 118 402 198 22 5 5 455 349 234 234 261 25 106 265 265 265 85 85 146 438 173 349 234 393 198 164 470 498 498 313 285 325 41 324 324 422 36 310 161 161 487 487 288 290 290 290 290 434 434 339 250 250 345 389 389 389 314 131 472 401 401 401 80 321 80 321 478 66 68 482 115 273 374 374 374 132 413 33 250 212 354 420 420 420 464 464 255 255 251 251 241 431 235 235 235 235 235 235 413 303 303 303 48 48 48 417 417 170 170 421 421 491 421 421 491 491 128 128 491 128 128 128 193 193 17 +103-1241-0008 103 756 1 12 5 16 8 37 34 14 11 20 33 30 15 25 18 4 40 9 19 25 19 25 4 25 11 17 6 25 18 4 16 5 25 7 30 5 17 27 1 4 25 31 14 11 12 4 33 9 30 19 31 22 5 16 19 32 5 23 1 9 5 33 12 13 30 38 5 40 5 29 4 31 5 25 21 14 11 30 3 29 33 6 16 14 39 36 1 5 23 19 33 5 23 17 14 23 1 32 20 40 31 19 33 19 26 7 33 12 13 30 3 25 12 5 32 19 26 17 5 23 40 1 8 4 31 33 18 14 33 5 17 27 19 25 33 5 12 5 23 15 11 20 40 38 15 33 19 26 30 36 24 9 5 33 32 20 19 25 16 6 30 24 11 24 20 17 30 15 37 23 20 1 12 4 33 32 20 29 30 5 16 14 11 33 19 31 33 15 7 33 31 8 11 1 23 2 3 4 7 3 4 3 2 3 5 2 4 2 2 2 4 3 3 3 11 4 4 3 3 3 9 4 5 5 4 2 2 6 5 2 3 10 7 8 3 4 2 3 2 4 4 4 2 2 4 3 2 4 2 4 4 6 23 3 5 3 1 2 3 3 2 3 2 4 4 5 2 3 2 3 5 2 4 3 2 4 6 2 5 9 10 6 3 1 2 1 2 4 6 9 14 5 3 3 3 2 2 4 5 4 3 2 2 3 3 3 1 2 5 2 4 2 2 3 6 20 7 7 4 2 2 2 2 2 4 6 3 3 3 1 2 3 4 4 2 4 4 3 3 3 2 4 4 2 3 3 4 2 5 3 2 4 4 2 3 3 1 2 4 4 3 5 4 2 8 13 2 3 2 3 3 2 2 3 3 3 2 2 2 2 3 2 6 4 2 8 6 5 17 17 17 296 296 296 317 52 52 52 52 52 52 52 52 52 51 51 51 184 184 321 321 320 127 5 5 38 349 261 261 25 106 265 265 85 85 146 438 173 349 221 401 127 114 498 498 498 313 325 34 324 324 422 236 36 161 161 487 487 288 290 290 290 434 339 199 58 254 254 71 281 342 142 221 336 354 137 137 137 137 116 335 335 14 14 411 188 188 340 340 340 340 330 330 388 94 199 199 89 89 89 446 446 67 64 131 472 472 458 144 180 106 426 426 426 426 426 426 282 282 388 195 117 404 404 225 225 72 110 486 486 486 460 460 169 352 352 29 44 44 94 199 145 145 315 315 315 468 468 406 467 467 467 469 416 416 192 180 84 84 84 84 375 98 98 13 417 417 47 491 47 80 80 321 321 320 83 83 145 145 365 365 330 330 379 77 77 224 179 179 179 313 314 198 164 127 114 92 92 92 92 167 457 401 401 401 321 354 190 380 288 278 278 31 342 86 105 105 458 192 255 255 349 205 261 25 278 278 278 99 436 436 395 302 302 302 375 98 98 13 229 321 247 312 312 292 1 1 292 326 1 326 23 23 23 23 101 101 101 149 391 491 289 289 320 7 354 159 159 159 159 159 167 35 35 198 127 0 0 222 468 245 378 345 141 141 281 453 453 44 44 44 44 259 74 437 311 311 311 311 150 150 342 224 494 494 469 116 64 212 310 300 334 382 313 314 314 239 161 161 380 499 405 405 206 215 215 96 96 272 34 106 405 405 206 169 349 352 234 155 332 332 332 313 219 219 477 477 477 477 132 132 98 98 417 417 417 417 47 47 491 491 47 80 80 80 321 320 412 287 44 44 44 175 81 431 278 278 285 302 497 497 122 416 144 180 498 498 498 498 498 499 302 375 375 98 98 263 13 417 417 417 417 170 491 491 47 491 491 491 47 47 435 435 321 321 373 310 400 400 400 30 422 422 162 232 68 172 115 470 278 278 325 176 176 135 135 200 200 464 180 113 113 113 113 167 167 35 35 127 114 264 264 468 406 467 467 125 125 125 348 466 22 283 455 99 338 338 395 395 360 360 360 200 200 248 212 302 302 302 302 375 375 185 269 433 112 427 491 247 312 312 126 292 292 292 23 23 23 23 23 101 101 101 149 149 149 289 321 412 287 287 287 111 111 111 438 145 376 376 460 460 169 150 86 86 238 6 272 272 156 382 313 325 34 87 87 416 144 27 180 84 84 496 88 88 88 340 340 340 116 33 394 465 377 123 123 198 22 283 283 455 251 251 241 431 171 171 171 252 325 41 41 41 318 318 49 9 142 397 364 109 109 403 171 171 252 449 449 176 135 328 200 248 248 345 380 288 288 496 203 203 53 53 212 354 159 159 159 159 167 167 310 107 338 400 400 400 30 324 464 121 121 33 394 90 393 155 155 25 148 148 387 387 387 203 53 53 53 64 10 429 429 429 301 416 32 321 208 79 79 380 288 288 171 171 171 252 173 173 402 26 359 474 474 474 474 19 19 19 229 321 247 126 126 326 326 326 326 101 149 149 228 321 321 412 45 45 45 169 143 310 107 400 400 30 301 422 129 74 492 492 245 349 349 261 25 498 498 498 313 313 36 377 87 87 38 54 86 6 272 470 171 464 464 464 113 113 113 113 167 77 478 172 172 273 265 265 265 265 85 85 299 299 24 131 419 439 439 417 417 237 128 193 17 +103-1241-0009 103 694 1 32 20 40 5 22 15 31 8 32 35 11 31 15 1 8 24 25 3 33 19 22 31 29 13 22 33 19 26 5 17 14 23 1 31 13 11 24 4 34 39 36 9 23 4 26 22 23 20 1 19 33 31 5 9 28 8 37 22 5 24 16 6 30 1 18 20 32 35 11 9 20 18 20 30 1 24 19 31 19 40 4 23 5 17 40 4 25 11 14 31 29 13 25 31 14 38 5 40 33 19 9 30 19 26 18 19 24 27 37 14 16 14 24 25 27 37 5 31 22 27 32 5 16 6 30 24 20 1 12 5 31 33 15 32 5 25 24 4 31 33 14 38 19 31 5 23 11 1 10 7 4 4 4 4 8 5 4 4 2 3 2 12 53 9 1 3 2 3 2 3 2 3 2 3 3 1 3 3 4 6 8 14 7 2 3 2 6 2 3 2 4 3 3 3 3 3 6 24 4 4 2 2 8 11 4 4 4 2 6 5 3 8 13 5 2 5 2 2 3 6 4 3 7 19 3 2 3 2 4 6 3 3 3 4 4 2 2 4 6 3 2 4 5 6 2 2 4 2 2 4 2 3 2 3 2 4 3 4 3 4 2 1 5 3 2 2 5 3 4 5 3 5 3 2 4 8 27 3 3 4 3 3 5 3 2 3 6 3 3 4 4 2 4 3 3 6 11 17 17 17 363 363 363 51 51 184 184 321 373 373 338 400 400 400 400 213 356 356 368 453 342 168 44 44 44 44 458 144 445 351 343 343 343 343 171 358 358 358 39 342 224 168 111 111 111 111 438 186 99 395 395 389 389 236 36 478 224 273 470 403 403 403 207 207 207 454 263 48 417 417 417 417 417 237 237 442 28 28 28 491 28 442 442 442 442 362 362 362 362 362 362 362 362 362 362 305 218 218 218 218 366 218 491 491 366 366 366 366 366 366 491 366 366 366 366 316 316 316 316 435 321 289 321 209 287 287 111 111 111 111 438 438 203 10 479 307 307 307 61 285 34 154 154 458 96 342 86 105 336 470 470 151 151 178 178 96 36 449 176 135 135 200 464 44 44 44 416 321 208 79 498 498 498 498 499 302 375 98 98 98 13 13 414 170 170 442 47 47 47 491 47 47 47 491 316 316 80 80 321 289 66 66 66 179 179 179 179 314 314 196 217 473 486 486 329 460 169 169 352 183 485 485 485 374 301 129 321 259 425 425 386 431 376 365 365 299 76 76 465 26 26 359 359 474 474 474 19 19 229 321 247 126 126 126 126 326 326 326 326 101 408 408 408 391 391 391 391 316 316 80 80 289 321 321 321 188 177 177 177 356 356 356 342 168 44 44 44 8 32 32 32 321 354 354 153 153 153 153 387 387 387 146 464 464 111 111 111 146 438 202 402 402 402 75 144 27 437 319 319 203 53 53 394 90 393 234 205 155 332 148 148 148 372 372 372 59 452 263 263 78 414 47 47 47 47 47 491 47 47 80 491 321 289 289 373 451 451 451 30 30 30 99 338 338 389 389 389 389 314 32 239 354 420 420 420 213 213 252 422 183 183 451 286 286 286 286 334 59 59 59 452 263 321 247 247 126 126 292 292 326 326 326 326 326 326 101 101 101 149 228 289 289 320 217 473 258 258 31 342 224 494 494 368 453 342 483 14 145 145 284 329 329 175 175 81 81 469 416 458 458 96 342 68 115 273 365 365 365 330 348 64 212 300 382 382 313 186 162 162 482 482 105 105 336 470 470 432 330 330 64 64 77 77 342 224 300 334 334 334 59 245 43 43 345 141 141 141 281 9 9 6 6 87 87 87 87 8 321 354 190 288 288 360 360 200 200 183 183 57 57 57 57 53 473 106 410 410 410 410 173 280 29 29 382 245 349 155 155 165 165 165 53 53 10 479 331 84 84 496 274 173 280 29 38 38 162 482 482 105 336 144 180 496 496 496 274 99 99 436 107 60 423 423 349 349 205 155 155 332 332 332 372 372 245 203 473 429 429 429 429 19 19 454 454 417 414 170 170 170 28 491 28 2 2 2 2 491 2 491 491 2 2 2 316 316 316 491 316 316 73 321 321 321 321 7 127 5 5 38 162 342 86 238 6 371 470 171 171 171 252 99 436 436 60 298 275 116 33 250 217 473 65 486 486 486 460 169 169 150 86 238 6 272 300 334 382 245 43 43 364 276 109 278 278 31 342 342 224 302 302 302 302 375 122 122 122 352 419 427 229 247 312 15 15 15 15 15 193 193 193 17 +103-1241-0010 103 793 1 17 13 31 12 13 30 40 31 5 24 19 31 33 15 22 1 18 20 31 13 11 1 24 19 31 19 40 31 29 13 25 31 14 22 15 24 6 16 12 5 33 30 15 25 38 19 34 12 4 33 17 14 23 5 25 11 17 15 37 18 14 19 25 33 5 24 8 10 3 30 21 1 31 13 11 39 36 5 25 11 39 6 30 31 19 31 33 14 38 14 5 11 3 29 33 19 26 18 14 16 14 24 5 25 6 30 16 5 25 5 31 8 23 5 24 1 5 25 11 12 5 33 39 36 38 35 11 9 20 5 23 6 26 16 14 18 14 29 30 13 40 5 25 33 23 20 1 12 4 33 31 6 23 8 25 27 5 9 7 33 19 33 1 4 25 11 8 18 4 37 5 25 17 3 33 13 25 20 24 6 30 6 30 16 5 25 40 22 5 25 31 20 23 11 18 19 30 5 9 7 33 31 1 14 5 5 5 2 3 3 2 4 4 5 2 5 3 6 8 8 5 3 5 5 5 19 5 3 4 2 2 2 3 3 4 3 3 4 4 4 4 4 2 2 4 3 3 3 2 2 2 2 6 3 3 8 5 3 1 2 3 5 3 3 4 2 2 3 1 4 5 5 5 5 8 14 6 4 2 6 6 3 1 2 1 4 3 5 3 4 3 4 4 4 3 3 5 3 1 2 3 4 4 4 2 3 1 3 5 3 4 1 2 3 5 6 3 2 5 13 5 1 2 1 3 3 3 3 2 2 2 2 3 3 4 4 4 4 1 3 4 5 2 2 3 2 3 3 2 6 15 5 5 3 2 6 4 5 4 6 2 3 5 2 4 5 8 9 1 3 6 3 4 2 2 2 3 3 3 1 3 3 4 3 3 6 3 3 3 3 3 2 2 3 5 5 4 2 2 2 5 1 2 9 3 9 21 17 17 17 296 363 363 363 51 51 51 184 491 184 321 7 7 320 127 357 357 443 443 240 271 150 39 86 238 198 198 114 0 222 468 468 313 186 186 162 68 68 115 273 231 231 231 231 53 53 217 473 65 258 38 31 162 68 68 238 6 470 470 470 171 171 171 171 358 358 233 233 321 192 419 439 417 417 417 237 237 47 80 321 321 435 373 451 451 451 30 30 422 162 68 68 115 470 470 120 120 120 37 24 24 404 13 229 491 247 312 126 292 292 292 292 292 21 21 21 408 408 408 149 149 228 321 321 320 7 217 473 258 258 258 31 342 224 494 494 494 31 162 232 105 105 336 470 329 432 330 330 64 77 77 224 300 334 382 245 245 458 144 445 210 210 210 210 210 203 203 53 106 230 426 426 206 169 349 402 198 198 22 283 455 236 161 161 487 487 288 290 290 290 434 434 250 250 345 333 333 220 220 129 127 114 92 92 92 92 167 167 457 32 32 32 259 208 498 498 498 498 498 134 302 302 302 375 175 175 81 89 89 322 67 394 32 239 144 445 210 210 210 210 210 173 349 402 156 156 156 156 156 467 467 340 340 116 394 465 377 123 123 399 70 46 46 46 46 46 438 236 36 310 107 395 180 499 499 306 306 306 306 306 59 37 37 243 233 75 227 419 427 78 56 491 491 312 292 292 292 23 23 23 408 408 408 391 321 321 373 66 68 115 273 470 120 120 240 314 314 219 219 219 219 152 152 152 374 132 132 88 88 89 89 446 116 212 131 219 222 222 222 387 387 186 186 162 232 172 115 273 278 278 31 31 54 86 238 6 272 300 300 355 132 43 345 347 347 347 347 347 467 467 313 236 236 239 384 180 180 405 405 206 215 215 35 96 272 176 135 135 200 248 58 156 156 156 156 156 59 245 349 155 155 165 165 165 165 53 44 44 44 335 14 14 411 411 153 372 372 372 349 349 352 261 242 242 94 199 459 271 38 162 342 68 115 273 265 265 265 85 146 146 175 175 81 459 203 203 117 404 229 247 126 126 126 326 326 326 326 101 101 149 149 228 321 412 83 55 55 55 322 67 466 45 45 45 45 36 36 107 219 152 152 152 132 378 345 389 389 389 314 239 239 420 420 420 464 255 255 255 251 251 241 431 235 235 235 235 348 200 248 76 393 155 332 332 332 245 156 156 156 156 245 245 129 129 321 74 190 488 488 151 368 453 453 168 11 11 379 64 243 465 26 359 474 474 474 474 19 19 48 417 417 417 417 170 47 491 47 491 47 491 491 47 316 80 289 321 7 7 127 114 92 92 92 92 240 167 77 77 342 168 106 297 297 297 297 297 297 293 175 111 111 111 111 438 438 438 10 479 331 84 84 84 88 88 255 255 255 8 354 180 113 113 113 113 450 285 285 277 277 277 277 24 131 439 417 417 417 417 491 491 47 491 80 80 321 412 83 83 83 194 194 55 55 322 212 34 111 111 111 111 111 438 438 58 72 110 202 202 202 202 202 29 242 116 90 394 239 27 180 405 405 206 240 285 34 475 475 475 475 475 475 301 399 70 138 138 138 138 138 372 245 245 14 411 411 153 153 372 372 372 349 349 155 29 242 275 379 379 471 471 49 142 221 336 144 121 121 379 394 478 68 342 115 444 444 444 213 464 139 139 302 302 497 122 122 131 183 286 286 286 286 406 406 467 467 255 8 354 180 180 113 113 113 450 450 413 37 243 270 270 433 390 390 18 112 56 56 56 312 312 312 15 15 15 15 15 15 15 15 15 15 15 260 260 260 193 193 193 193 17 +103-1241-0011 103 724 1 8 11 27 25 33 5 25 11 14 31 33 4 25 11 1 31 13 11 24 4 34 39 36 18 13 23 29 23 5 31 23 20 1 38 19 32 19 26 12 5 33 24 3 30 19 23 5 38 5 40 4 33 18 4 25 11 33 19 22 27 29 38 19 34 12 5 31 19 10 36 15 32 5 25 1 38 13 23 39 36 11 9 13 33 14 22 38 13 31 10 5 25 12 5 17 14 23 1 31 13 11 12 5 31 33 15 32 5 25 24 4 31 33 14 22 13 30 23 5 31 23 20 1 8 11 13 30 31 15 32 20 23 9 20 15 9 5 23 33 36 19 22 31 29 23 15 25 1 32 20 40 17 3 33 5 33 5 26 5 37 18 14 27 25 12 4 33 31 14 33 5 25 1 23 8 4 5 1 3 3 2 2 3 6 3 9 2 9 15 6 1 4 3 5 3 2 4 5 2 3 4 2 3 4 2 6 14 4 3 4 3 5 2 2 4 2 2 4 3 4 3 3 2 3 3 2 7 5 3 2 2 2 5 6 3 2 1 2 1 2 4 2 5 4 4 6 2 6 33 6 6 4 4 3 2 3 3 3 2 3 2 2 3 4 1 2 1 2 2 7 7 16 6 2 3 1 3 3 3 3 5 2 2 3 4 3 2 2 5 2 4 2 3 4 2 8 25 8 4 3 4 5 5 4 3 3 2 4 4 2 1 2 3 2 3 3 3 2 3 7 5 6 4 3 4 4 1 3 3 5 3 3 2 2 3 6 7 4 2 5 4 6 6 1 2 6 16 17 17 296 296 296 317 317 317 317 317 461 491 461 461 461 461 461 461 461 184 184 289 321 321 209 287 111 111 111 438 438 10 239 384 371 84 84 350 350 413 64 212 131 34 145 319 348 348 212 212 300 494 469 186 162 232 232 232 482 238 6 272 470 470 294 294 294 294 294 294 282 388 388 303 243 75 131 419 439 439 439 78 78 47 47 47 47 491 47 47 47 491 47 47 491 491 80 442 289 66 66 68 179 179 179 179 314 314 196 196 70 65 329 486 329 460 169 164 164 485 485 485 485 132 274 58 58 72 72 72 437 268 139 293 293 215 215 35 26 26 262 262 262 262 262 342 342 26 26 359 474 474 474 474 19 454 229 247 247 126 126 326 326 326 326 101 101 101 149 149 228 321 7 345 109 109 278 278 99 447 447 107 176 135 135 135 200 200 248 212 212 45 45 45 45 35 196 196 217 70 65 65 329 42 42 380 288 256 256 139 175 175 423 423 423 43 43 345 141 141 281 281 453 168 415 415 415 36 131 119 72 72 72 110 294 294 294 294 294 282 388 388 195 394 76 75 377 87 87 87 129 321 144 27 351 496 496 496 274 215 215 401 401 321 354 333 220 220 198 22 283 38 162 342 342 224 494 494 236 36 107 485 485 485 134 88 418 418 418 418 418 252 99 436 436 60 298 298 298 303 117 48 13 229 321 247 312 312 187 187 12 12 12 12 12 12 260 260 260 260 491 163 163 366 366 491 366 491 366 366 316 316 316 316 321 321 321 435 435 7 7 364 276 109 109 443 443 443 139 139 293 293 293 122 219 219 152 152 152 152 314 314 472 401 259 354 180 443 443 285 285 382 382 245 143 458 208 441 151 151 151 169 150 238 238 272 60 60 242 116 466 22 283 416 144 79 498 498 498 498 499 355 302 375 375 98 98 263 13 417 417 417 417 237 237 237 47 47 47 491 47 47 47 80 80 80 321 435 435 66 115 179 179 179 179 314 198 22 283 455 38 162 86 238 6 470 470 171 171 171 99 99 436 436 60 298 116 33 250 217 473 65 486 460 460 169 150 86 6 272 300 382 313 458 445 445 445 351 351 264 468 468 134 134 175 262 262 262 262 39 342 26 26 359 474 474 474 19 19 454 229 321 321 247 126 126 126 292 326 326 326 326 326 326 21 326 21 326 101 101 101 101 149 149 149 228 321 412 412 287 287 111 111 111 438 438 236 239 384 371 470 264 264 468 468 313 186 162 342 68 115 470 403 403 171 171 422 186 99 338 338 395 494 139 139 497 122 8 420 420 420 420 464 171 171 171 134 8 29 100 497 122 36 377 87 87 87 154 154 154 458 96 96 232 105 105 336 425 386 386 431 290 290 290 290 434 434 434 339 195 117 117 417 417 417 417 225 225 435 435 338 400 400 400 30 422 281 342 342 105 221 144 180 106 189 240 285 34 44 44 236 36 108 119 119 351 319 319 319 348 200 200 69 223 223 130 402 156 156 156 156 406 467 467 350 350 350 350 350 350 413 413 413 195 33 212 198 114 92 92 92 92 92 167 35 478 478 68 68 172 115 273 498 498 498 396 396 385 233 242 242 275 303 303 303 303 48 305 417 78 170 421 421 491 491 128 128 491 128 128 128 193 193 17 +103-1241-0012 103 748 1 24 15 9 20 12 15 38 14 7 33 5 37 9 28 40 5 37 12 5 9 30 4 25 11 39 36 38 3 25 33 5 11 1 18 20 38 6 22 33 1 21 6 25 33 5 23 20 5 38 15 1 9 20 19 26 18 5 26 17 30 20 1 4 25 11 12 20 5 25 16 6 30 10 5 25 5 33 24 4 34 39 36 38 5 40 23 13 16 33 19 11 36 12 4 33 18 38 19 10 38 5 40 18 3 30 11 14 16 14 18 19 24 12 5 25 1 9 19 30 11 19 26 5 23 8 5 25 19 25 19 33 31 11 13 25 1 38 6 22 5 29 33 36 5 17 14 23 1 5 31 33 30 15 25 21 17 14 23 1 5 25 6 30 16 5 25 17 14 23 1 20 4 4 2 3 3 4 4 3 4 3 1 4 4 9 3 2 1 2 2 2 3 5 1 2 2 4 5 2 2 3 3 6 28 5 4 3 7 3 4 2 4 4 3 2 2 2 5 4 4 11 4 3 7 1 4 5 2 4 2 3 6 22 7 2 2 1 3 3 3 4 2 3 3 2 1 3 2 4 6 4 3 4 3 2 4 3 4 2 3 2 5 14 5 7 4 4 2 2 5 2 1 4 5 2 3 2 3 3 2 6 3 7 3 3 7 2 2 4 4 2 2 4 4 6 11 2 2 2 1 3 4 4 3 7 10 23 7 7 5 4 5 4 4 3 4 8 10 15 6 6 3 2 6 2 3 4 6 6 1 4 4 7 3 5 1 6 3 7 6 8 17 17 17 296 296 52 52 52 52 52 52 52 363 101 101 51 51 228 491 289 321 7 473 473 329 476 171 252 378 337 337 324 301 216 216 0 0 0 301 378 43 345 347 347 347 406 467 145 113 113 113 113 285 34 223 462 402 402 221 401 259 354 153 153 153 153 387 387 387 318 318 185 453 9 168 69 223 198 198 22 283 455 129 354 190 380 499 288 365 365 282 299 64 212 131 219 219 152 152 152 378 43 364 276 174 174 319 319 348 64 76 449 191 191 191 191 24 131 404 417 417 417 417 417 237 237 237 491 237 28 491 28 362 491 102 362 102 362 362 362 491 366 491 316 491 491 316 316 435 435 321 321 373 451 451 30 30 301 378 364 276 276 346 346 405 405 405 405 206 178 96 96 227 472 472 472 472 401 75 310 107 395 180 329 426 426 206 348 76 465 26 26 359 359 359 474 474 324 464 464 255 255 255 43 364 276 109 109 403 403 403 207 207 207 19 19 454 197 197 80 80 321 321 320 7 354 420 420 420 360 360 360 135 135 135 200 200 248 58 58 72 437 319 319 319 348 348 64 248 212 79 495 334 41 41 41 19 454 229 247 247 126 126 126 326 326 326 326 326 326 326 326 326 326 101 101 101 149 149 149 228 321 412 83 194 194 194 194 322 388 67 466 127 448 448 448 464 319 319 319 348 90 90 205 261 25 148 148 148 387 396 186 310 107 60 60 298 94 11 11 11 457 457 217 217 473 65 486 486 486 460 460 169 169 164 164 485 485 485 485 485 374 132 43 43 345 141 141 141 281 342 26 26 251 241 431 443 443 169 169 402 402 6 272 377 87 87 236 239 239 384 371 371 374 374 374 374 132 132 132 132 132 132 132 132 197 197 321 127 114 114 92 92 92 92 92 460 167 385 35 75 227 472 397 397 345 407 407 407 407 407 310 447 397 397 141 141 141 281 54 9 142 72 72 72 437 306 306 306 306 396 396 285 300 382 382 349 205 155 332 332 332 58 58 183 183 183 57 57 57 57 203 53 381 381 195 394 212 198 127 114 89 446 446 67 394 394 32 32 32 401 401 321 75 354 485 485 286 286 286 468 396 313 325 325 176 135 135 200 200 199 44 44 44 251 251 251 251 241 241 431 265 480 480 480 85 85 85 146 464 464 275 275 388 94 199 340 340 199 199 154 154 154 36 77 342 342 86 221 336 321 384 384 371 93 120 120 120 120 330 388 303 195 195 303 117 404 404 78 78 491 491 312 312 292 292 292 12 12 12 23 23 260 260 260 260 260 391 391 391 491 289 289 321 7 7 7 364 276 276 346 346 405 405 405 206 178 35 35 458 192 180 230 230 230 230 215 215 35 96 401 75 108 377 123 123 123 88 44 44 44 416 416 239 144 79 498 498 498 498 498 498 134 302 375 375 375 375 98 98 98 13 417 417 417 417 237 237 47 47 47 491 491 80 80 491 289 321 321 287 287 44 44 44 38 162 232 482 482 482 238 6 161 161 79 487 288 290 290 290 434 434 339 339 310 107 447 221 144 79 498 498 498 498 498 302 302 375 375 98 98 225 483 226 209 44 44 44 44 33 335 14 14 411 411 153 153 372 372 372 396 349 349 234 261 261 242 242 116 116 33 90 90 212 239 79 79 498 498 498 498 134 302 375 375 375 98 13 229 321 247 15 15 15 193 193 193 17 +103-1241-0013 103 740 1 4 25 11 19 24 4 25 11 5 37 18 14 38 8 32 20 38 5 40 5 25 33 5 9 28 1 24 4 34 39 36 17 30 27 25 11 19 25 31 29 19 30 19 33 13 40 18 20 33 14 25 11 5 9 7 33 5 25 11 32 5 16 5 23 11 1 21 13 25 33 23 20 11 7 25 12 5 29 23 4 33 16 6 30 24 33 6 30 11 40 18 14 1 32 20 18 4 11 9 19 25 38 3 10 19 26 18 19 24 13 37 14 31 19 25 31 18 20 18 4 11 29 4 31 33 18 14 1 4 25 11 32 20 18 4 11 18 14 8 40 3 25 18 19 24 25 7 1 24 4 34 39 36 38 5 40 25 3 33 23 35 22 19 26 4 33 18 14 1 10 8 2 3 3 4 5 2 3 2 2 4 6 7 7 4 3 4 2 3 2 1 2 2 3 15 32 7 8 6 3 5 4 6 9 3 2 3 4 8 3 1 8 2 3 2 2 2 3 6 7 2 1 2 2 9 4 1 3 3 6 4 4 2 5 3 2 6 2 4 3 2 4 4 5 3 1 3 3 3 4 4 3 2 2 3 4 4 3 4 2 2 8 40 5 4 2 2 3 2 2 3 4 4 4 2 2 4 3 2 6 4 3 5 3 4 2 3 1 2 1 3 6 5 5 1 2 8 14 6 1 2 3 3 5 3 2 4 6 8 5 4 2 3 3 2 4 15 24 6 6 2 3 3 2 2 4 3 4 2 5 2 3 3 4 5 2 2 7 10 17 17 17 363 363 363 51 149 228 228 321 83 83 194 194 194 194 322 67 64 212 212 34 44 44 44 217 217 473 65 486 365 365 460 330 388 64 212 131 34 223 223 130 402 402 156 156 156 156 59 59 59 245 245 43 43 364 276 346 346 346 346 346 265 85 85 146 438 186 338 338 400 400 30 378 378 345 141 141 141 141 281 453 242 242 116 64 131 34 44 44 8 354 354 153 153 153 153 387 387 387 387 207 207 207 98 48 417 417 417 417 170 170 491 28 28 28 28 2 2 2 491 2 2 491 2 491 2 2 2 366 366 491 316 316 316 316 491 73 491 289 321 7 7 217 217 473 65 486 486 486 486 460 460 169 169 169 164 164 164 219 219 219 485 485 374 374 132 132 32 32 321 208 208 79 79 380 380 288 84 496 496 496 496 274 274 413 413 413 413 64 212 212 34 34 340 340 116 33 394 478 478 232 232 232 232 232 105 105 336 354 470 286 278 498 468 468 468 468 468 467 277 385 325 449 34 253 253 253 453 168 30 30 30 422 129 75 108 119 308 308 308 308 308 308 308 396 313 64 212 131 255 255 8 354 180 113 113 113 113 113 450 450 413 24 36 449 89 89 116 33 33 394 90 338 338 338 338 338 338 395 189 151 151 169 349 349 352 29 302 302 302 302 497 122 122 122 314 401 401 401 321 310 107 107 395 432 432 432 330 379 64 76 36 26 26 359 474 474 474 324 301 239 384 371 180 315 315 315 450 450 413 466 466 22 283 455 455 259 74 425 386 431 486 486 460 460 167 35 393 205 321 155 148 148 148 387 387 203 53 90 90 75 119 441 441 153 153 153 153 372 372 37 314 77 77 342 9 224 156 156 156 156 59 452 452 263 229 247 247 312 312 312 292 292 292 292 292 292 292 1 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 260 408 408 391 391 321 321 373 373 400 400 400 30 30 58 110 254 254 254 254 314 35 259 137 137 137 137 399 250 250 276 346 346 346 206 206 240 310 310 107 395 176 135 135 200 248 183 57 57 57 57 57 53 473 335 14 411 145 463 463 463 463 29 29 382 313 186 186 162 68 68 273 278 278 278 330 379 394 77 342 342 451 30 30 30 464 254 254 254 131 129 321 74 437 311 311 311 311 311 311 460 169 150 86 86 238 6 272 272 334 334 334 59 59 452 452 229 321 247 126 126 126 292 292 292 326 408 408 149 149 228 321 321 209 83 55 55 322 322 67 310 400 400 400 30 30 3 58 72 72 110 110 254 254 254 240 131 58 183 156 156 156 156 245 335 14 411 265 265 265 265 265 85 85 146 318 368 342 168 168 125 125 125 125 348 199 183 183 57 57 57 57 53 53 10 10 479 331 331 315 315 315 315 315 450 450 450 450 98 98 13 13 78 170 170 170 170 28 28 2 491 2 2 491 2 2 2 2 316 491 316 316 316 316 289 289 289 321 7 7 217 473 329 329 329 329 329 329 329 164 164 485 485 485 301 378 378 345 141 141 141 281 9 238 221 196 479 307 307 307 61 167 167 457 457 251 251 241 367 367 367 367 458 192 176 135 135 200 200 464 464 415 415 415 415 415 240 285 156 156 156 156 59 452 452 229 321 247 312 15 15 15 15 193 193 193 17 +103-1241-0014 103 756 1 5 10 8 23 11 5 37 5 9 7 33 19 23 13 37 5 25 1 17 3 30 9 11 19 25 5 37 13 30 20 32 6 30 33 1 37 13 30 20 33 8 33 1 37 13 30 20 5 17 23 20 11 30 13 31 5 37 39 13 23 27 19 32 17 30 15 38 19 25 31 20 1 32 20 38 6 30 5 16 15 11 19 11 9 30 7 25 31 15 23 14 18 4 33 1 5 25 11 9 19 25 20 34 12 5 18 4 33 1 19 22 31 33 13 25 11 19 26 11 7 25 18 14 9 4 22 38 14 33 36 9 30 15 11 40 5 37 13 30 20 34 19 22 1 11 19 31 8 11 5 11 23 20 30 13 11 1 18 13 30 1 14 5 6 7 3 2 1 3 2 3 4 2 2 6 2 3 3 8 23 4 6 3 2 3 2 2 2 4 3 4 3 9 4 3 6 5 5 3 4 4 7 6 8 4 4 3 4 7 4 4 5 4 3 2 4 7 2 2 5 4 3 5 2 7 3 3 8 5 2 4 6 8 26 6 3 4 4 3 3 6 7 2 3 4 4 3 8 3 6 4 5 3 4 6 8 2 5 2 1 3 3 3 5 4 2 1 7 5 7 12 4 3 4 3 3 4 2 2 4 3 6 2 5 2 4 8 8 2 3 7 7 3 3 8 4 3 2 5 3 3 5 7 5 8 6 2 3 8 5 3 3 3 2 5 7 6 8 1 7 2 11 17 17 17 17 296 491 317 317 491 491 184 184 184 321 320 412 44 44 44 44 36 310 107 107 395 437 91 91 91 91 85 85 139 139 293 122 122 34 69 223 130 280 44 44 8 8 354 180 113 113 113 285 285 34 44 251 251 251 241 241 431 443 443 173 173 280 242 275 275 116 195 117 117 48 417 414 170 491 170 491 321 211 211 312 312 292 292 326 326 326 326 23 101 101 101 149 391 491 289 289 321 144 144 106 499 306 306 306 306 306 396 396 215 215 96 36 272 272 340 340 94 199 44 44 44 4 280 104 104 104 104 104 468 337 337 337 337 422 422 99 338 338 338 338 395 395 153 153 153 153 372 372 396 385 385 36 227 419 225 225 80 80 491 321 321 7 7 32 4 104 104 104 104 104 468 337 337 337 324 422 143 36 108 119 119 437 437 265 265 428 428 428 146 358 358 233 75 227 419 225 225 225 80 80 80 80 7 7 4 280 104 104 104 468 468 337 337 337 324 3 14 14 411 411 284 319 319 240 416 416 96 134 134 359 359 81 166 166 324 301 236 239 161 161 79 288 288 151 151 271 271 39 342 342 342 224 462 462 462 462 402 402 219 219 219 219 180 180 443 139 175 175 81 84 496 88 88 109 459 459 459 99 99 447 447 447 221 336 144 208 79 380 288 403 403 403 171 324 3 301 301 43 364 364 345 109 278 278 116 33 394 77 77 342 68 342 224 41 41 41 41 19 19 454 229 321 247 247 312 126 292 292 292 292 292 292 23 23 23 23 23 23 23 260 260 260 260 260 260 391 391 228 321 373 373 400 400 400 400 301 378 43 364 276 210 210 210 372 372 372 467 44 44 44 349 349 234 234 261 261 25 470 171 171 171 171 252 252 325 34 191 191 191 314 131 472 401 401 321 259 190 190 380 380 315 315 315 315 450 450 450 413 348 394 478 478 232 232 172 115 273 470 171 171 171 252 175 81 300 300 382 245 58 58 72 110 110 486 486 486 460 460 460 169 385 233 227 227 419 225 225 225 225 412 412 83 55 55 322 67 212 401 321 354 255 255 116 94 94 398 213 213 213 213 252 164 164 164 164 164 283 283 455 72 72 72 110 486 486 486 486 460 282 282 385 385 227 227 419 427 321 247 126 126 326 326 326 326 101 408 149 149 321 412 154 154 154 143 96 96 66 232 68 238 6 272 470 432 432 432 330 64 64 64 212 176 135 135 200 200 248 248 212 384 180 315 315 315 315 450 413 348 199 58 156 156 156 156 156 245 8 8 354 180 376 376 376 376 376 376 460 460 169 178 233 321 208 133 397 345 347 347 313 313 236 36 36 108 119 119 485 374 374 374 374 132 132 132 8 259 259 190 190 380 288 288 403 171 171 171 246 246 318 24 24 270 270 342 168 168 462 462 4 4 4 4 104 104 104 104 104 468 337 337 337 324 324 422 349 164 164 164 164 164 25 278 278 278 278 278 178 143 321 192 192 419 225 47 491 80 80 80 80 321 75 371 490 490 490 162 232 232 68 68 115 273 273 265 265 265 428 146 146 325 34 191 191 191 314 26 26 359 166 166 166 324 301 42 42 42 147 147 147 380 288 288 443 120 120 120 37 37 24 24 131 404 225 225 225 80 321 373 72 72 72 110 264 264 264 264 264 264 59 59 59 59 452 263 13 78 170 170 491 421 491 491 211 491 421 491 15 15 15 193 193 193 17 +103-1241-0015 103 700 1 18 14 16 15 31 38 5 40 31 24 6 23 1 38 8 33 1 5 25 11 34 19 25 1 6 23 31 27 24 5 10 16 30 13 22 5 23 11 1 18 14 24 7 34 38 5 40 23 3 30 21 1 5 25 11 31 27 38 14 18 14 8 40 1 38 19 10 23 35 22 33 17 30 20 25 19 25 31 5 24 23 8 33 31 5 25 11 24 36 11 40 1 4 25 11 17 30 15 19 25 5 12 14 40 1 31 27 16 3 30 12 20 6 30 11 5 25 13 30 20 5 9 40 14 37 14 1 4 25 19 22 31 33 30 6 30 11 5 25 13 30 20 5 9 40 14 37 14 1 18 5 2 5 6 7 1 2 2 8 3 1 14 5 8 7 9 1 2 4 2 7 4 7 11 6 4 6 4 5 4 3 6 2 1 5 2 5 5 18 5 2 6 10 7 2 1 5 4 6 5 10 1 4 2 2 5 4 3 2 4 6 13 7 1 2 2 6 3 3 3 6 3 3 6 2 3 3 6 3 4 3 6 4 2 1 2 1 3 8 3 6 9 7 2 3 3 5 6 3 3 4 4 5 11 40 7 7 3 4 3 3 3 5 3 1 2 1 2 2 4 2 3 5 3 3 8 21 5 2 2 3 2 4 2 2 2 2 1 2 4 3 4 2 3 5 4 3 7 5 17 17 17 296 363 363 363 52 52 52 52 52 408 51 51 51 184 289 321 320 156 156 156 156 245 349 205 205 261 343 343 343 343 343 343 252 186 39 342 86 86 142 397 141 141 141 281 162 232 232 232 482 482 105 105 196 70 65 65 481 481 481 481 182 182 182 375 375 375 98 98 98 225 225 80 80 491 80 80 321 7 7 364 276 276 346 346 428 428 428 428 146 146 358 358 233 321 227 227 419 225 89 483 321 188 89 446 446 33 394 394 76 164 164 164 164 164 278 278 278 278 120 330 303 303 303 303 117 48 48 417 47 47 491 491 80 80 80 80 289 321 320 287 287 297 297 297 297 293 293 186 162 54 68 115 224 273 84 84 84 274 399 399 70 70 383 383 383 383 167 310 107 447 447 447 393 234 261 25 380 288 151 178 178 458 458 208 302 302 302 302 375 375 122 122 227 419 419 427 82 321 312 312 126 292 292 292 292 23 23 23 101 101 101 149 228 228 321 320 373 156 156 156 245 399 217 217 70 473 65 315 315 315 315 315 450 450 293 169 352 352 352 352 97 397 397 345 345 141 141 281 453 9 26 26 251 241 241 431 284 306 306 306 306 306 306 396 396 396 37 233 36 310 107 107 447 18 97 97 225 321 412 83 55 55 446 67 394 478 66 172 115 273 273 84 410 410 410 43 29 347 347 245 245 58 156 156 156 245 14 14 14 411 287 265 265 265 265 265 265 85 85 85 207 207 318 185 269 433 433 160 97 397 397 345 407 407 407 407 310 107 447 447 26 251 241 367 367 367 367 458 96 96 272 472 472 221 401 321 321 208 79 79 288 288 360 360 360 360 434 434 339 199 340 340 340 116 33 394 478 68 68 172 115 273 319 319 319 203 53 53 251 251 241 431 428 428 428 428 146 146 385 77 77 342 224 89 89 116 33 250 217 217 473 65 374 374 374 132 132 37 37 24 321 270 433 160 112 427 229 247 312 126 326 326 326 101 101 149 149 321 412 412 83 55 55 446 446 67 131 472 472 458 208 208 79 79 380 288 403 403 403 403 207 324 464 464 464 446 116 94 199 493 493 493 493 493 216 300 334 334 304 304 304 185 185 269 323 18 112 112 56 56 56 170 170 28 28 28 491 491 28 28 362 491 362 362 362 491 491 362 362 362 362 40 40 362 218 491 491 305 305 366 366 366 305 366 435 435 435 435 435 435 321 435 435 373 373 66 68 172 115 344 344 344 344 344 274 274 349 205 261 25 106 306 306 306 353 396 313 216 22 448 448 448 14 411 411 153 153 153 387 387 313 314 196 196 398 134 134 468 337 337 337 464 464 255 255 215 96 368 453 453 168 273 498 498 498 396 173 29 29 334 334 59 452 452 263 13 229 491 442 312 312 312 292 292 292 292 292 326 326 326 326 326 326 101 408 149 228 321 305 209 287 44 44 94 199 154 154 154 96 96 482 482 238 6 161 79 153 153 153 153 387 396 240 314 196 309 199 264 264 264 468 468 468 337 337 464 464 464 255 255 215 96 478 342 9 168 470 498 498 498 396 173 173 280 29 334 334 59 452 452 229 321 247 15 193 193 17 +103-1241-0016 103 733 1 24 8 33 18 4 37 31 20 25 12 5 33 12 5 10 19 25 38 5 40 37 13 30 20 29 28 25 33 19 11 5 25 11 29 30 5 25 7 25 31 33 1 12 4 33 12 5 9 19 17 8 40 38 14 16 35 23 5 37 31 29 19 30 19 33 5 25 11 37 19 37 4 31 19 33 20 1 12 5 33 12 5 24 7 34 38 5 40 31 38 20 33 23 19 29 33 5 25 11 19 22 31 29 30 13 31 19 37 1 12 5 33 12 5 16 6 30 18 13 11 38 5 40 9 30 6 11 5 25 11 16 35 23 1 19 25 32 6 30 33 1 3 30 11 19 31 14 25 19 26 19 22 31 33 30 6 30 11 5 25 13 30 20 5 9 40 14 37 14 1 24 8 33 18 4 37 22 5 25 22 23 36 11 19 11 1 6 5 5 2 1 2 3 5 5 3 2 2 2 1 2 8 3 4 2 2 5 3 2 4 4 9 5 3 3 3 2 2 2 2 3 2 2 2 7 5 5 5 15 3 2 3 2 2 4 4 5 8 5 3 2 6 2 3 3 3 7 3 2 4 3 2 2 1 2 2 7 2 5 5 2 3 11 13 3 3 1 2 2 4 10 4 2 2 2 7 3 4 3 4 3 5 3 1 2 2 1 4 3 3 2 2 7 5 6 16 3 2 3 2 2 7 4 3 5 5 4 4 3 4 4 4 11 2 2 2 2 8 3 11 14 7 2 7 5 4 7 10 7 5 3 2 6 5 2 3 4 2 6 1 3 2 2 3 1 2 1 3 2 1 2 3 5 4 4 7 19 5 4 2 1 2 3 2 2 3 5 3 4 2 4 4 4 17 17 51 51 228 289 321 7 70 70 65 428 428 428 146 146 325 449 202 202 202 202 402 162 232 172 172 267 267 267 267 267 434 434 339 248 248 212 45 45 45 45 198 22 5 455 236 129 321 310 107 107 395 395 278 278 330 116 195 250 250 345 141 141 281 281 453 9 142 4 4 4 104 104 104 104 104 468 337 337 324 301 143 129 401 321 74 74 441 441 441 441 441 387 360 360 360 252 339 76 465 449 191 191 191 191 24 325 89 89 446 33 394 76 74 190 492 492 492 313 94 94 331 331 315 315 315 315 450 450 450 413 413 243 243 77 433 86 238 6 6 227 427 427 491 247 126 126 292 326 326 326 326 101 408 149 228 228 321 320 321 127 45 45 45 45 35 35 127 5 5 455 236 129 259 354 278 278 278 278 416 416 192 180 106 265 265 265 265 85 85 85 146 318 49 9 142 397 347 347 347 347 245 349 349 205 261 261 25 424 424 424 175 81 462 462 462 130 402 478 162 232 232 232 232 105 105 336 336 354 485 278 498 468 468 467 277 277 469 325 449 89 89 446 53 394 212 280 106 265 428 428 146 438 173 280 280 486 486 486 460 169 150 150 342 342 224 469 469 325 449 41 41 41 41 19 19 19 454 454 454 78 491 170 491 312 187 187 292 12 408 408 408 149 228 321 321 127 45 45 45 45 35 198 22 5 455 399 217 473 65 315 315 315 315 450 450 450 169 169 164 164 397 397 397 141 141 141 281 31 162 232 232 68 68 482 397 397 397 109 213 213 213 252 252 36 26 26 26 251 241 431 278 278 278 215 215 35 96 96 465 272 89 89 446 67 131 34 154 154 458 96 96 54 142 105 336 336 190 380 288 151 151 169 150 342 342 224 224 494 459 459 459 459 37 173 352 352 352 427 491 247 126 126 126 326 326 326 326 101 101 101 149 149 228 321 321 320 127 45 45 45 45 35 35 198 22 5 455 349 205 234 234 261 25 148 148 148 148 372 372 372 396 58 72 72 110 110 120 120 120 120 240 24 24 133 133 364 345 141 141 141 141 281 281 9 142 221 336 336 259 190 190 380 380 499 499 499 405 426 426 426 426 206 206 206 37 24 34 89 89 446 116 394 90 393 234 234 234 234 261 25 441 424 424 424 182 182 375 375 375 98 98 13 13 417 170 170 47 491 47 491 491 491 2 491 491 316 73 289 289 320 412 188 188 340 340 116 33 394 478 338 338 338 338 395 470 153 153 153 153 387 372 396 396 385 233 227 227 419 439 417 78 47 47 491 47 47 491 80 321 321 80 321 435 209 287 287 353 353 353 353 396 313 236 36 384 490 490 490 31 162 68 115 115 273 308 308 308 396 313 94 94 176 176 135 328 200 200 199 255 154 154 129 401 321 96 66 482 238 272 79 153 153 153 387 387 396 314 196 196 479 398 398 264 264 468 467 467 255 255 215 96 478 342 68 115 273 498 498 498 498 396 173 173 29 29 334 334 59 59 452 263 229 247 247 126 126 326 326 326 326 326 326 326 101 101 101 149 149 228 321 289 320 7 70 65 65 389 428 428 146 240 325 34 202 202 202 402 221 458 27 121 121 121 33 394 76 259 208 208 386 386 386 444 374 374 374 252 325 34 191 191 191 37 24 404 427 229 247 193 193 17 +103-1241-0017 103 794 1 38 5 40 31 27 23 36 11 5 22 30 5 31 23 20 5 16 30 15 11 1 24 4 34 39 36 18 7 13 37 14 1 38 5 40 31 29 13 30 11 12 20 6 30 11 20 23 5 37 31 29 20 22 19 26 16 14 31 33 1 16 6 30 13 40 31 36 25 13 40 32 20 22 5 25 22 23 36 11 19 11 12 5 33 18 20 38 5 40 22 5 24 19 26 33 36 18 14 1 32 20 31 33 35 11 5 29 1 17 30 4 31 29 19 26 38 19 34 38 5 25 34 19 25 9 30 7 25 18 4 25 11 1 12 5 18 4 25 11 5 23 5 37 5 32 4 9 20 27 23 11 16 4 32 5 25 11 22 3 30 29 5 33 9 4 17 1 12 20 5 12 14 32 20 18 13 23 11 7 33 19 18 19 24 1 8 4 1 4 5 2 5 4 2 2 3 2 2 5 2 4 2 5 3 7 5 45 5 6 4 4 4 5 4 4 4 8 4 5 2 3 4 3 4 4 3 3 3 3 4 4 6 4 1 3 4 4 3 3 2 6 4 7 4 4 13 6 3 3 2 3 4 3 3 2 2 3 3 3 2 3 4 3 3 2 3 2 2 1 2 3 3 2 1 4 5 3 2 2 4 5 3 3 7 12 6 3 6 2 3 2 6 8 6 3 2 5 4 3 2 4 2 1 4 4 5 5 9 6 8 4 2 8 3 8 7 3 4 12 2 2 6 3 5 2 3 2 2 2 2 9 5 3 8 6 3 2 7 4 6 2 2 3 6 2 3 2 3 2 4 8 5 11 3 4 5 5 3 6 3 4 4 3 2 6 4 2 5 4 7 14 17 17 17 363 51 51 184 320 320 345 333 333 220 220 402 66 66 68 115 344 344 344 344 274 274 251 251 241 431 374 374 374 374 285 34 469 469 143 458 208 79 459 459 271 31 342 86 26 26 166 166 166 464 464 464 255 255 349 234 234 261 190 380 288 288 403 403 403 207 207 207 37 24 24 404 439 417 417 417 170 170 28 28 491 28 491 362 491 491 362 491 362 491 491 491 362 362 491 362 362 491 40 211 369 369 369 369 21 21 21 21 21 21 21 408 408 408 149 149 228 321 321 320 7 217 70 65 486 486 486 460 460 169 169 164 164 164 485 485 485 485 374 132 132 58 58 72 268 268 268 268 268 88 88 109 84 463 463 463 173 280 29 334 59 59 452 263 263 417 417 417 417 80 321 321 7 7 345 141 141 141 281 162 232 232 232 482 105 336 336 470 470 264 264 264 264 468 468 313 313 314 314 198 22 448 448 448 464 106 106 372 372 372 313 236 236 36 371 485 213 286 286 286 139 302 302 175 175 69 223 223 130 478 232 232 105 105 336 321 354 470 213 213 252 143 192 176 135 135 135 200 248 248 248 393 205 261 25 498 498 498 498 498 396 271 271 39 54 86 238 6 427 427 247 247 126 126 326 326 326 326 101 101 101 149 228 321 321 373 155 155 155 332 332 332 372 372 372 467 253 253 38 162 232 172 172 115 485 374 374 374 348 94 199 253 253 253 99 338 400 400 400 30 422 143 144 27 121 121 121 394 76 76 208 208 386 386 444 444 374 374 252 325 191 191 191 37 314 198 198 45 45 45 183 183 451 30 30 301 378 345 141 141 281 342 342 221 336 144 27 27 351 319 319 319 53 53 176 135 135 200 248 76 75 108 377 123 123 123 123 132 58 156 156 156 156 59 59 452 229 229 247 126 126 326 326 326 326 101 101 408 149 228 321 321 373 400 400 400 30 422 422 162 482 482 482 482 238 272 189 189 189 189 285 34 230 230 230 230 230 215 215 35 74 419 439 439 78 78 47 47 80 80 80 289 289 320 208 79 499 486 486 460 460 169 150 342 105 105 336 354 176 176 135 135 200 248 248 333 333 220 220 220 142 133 364 276 174 174 174 174 319 319 348 348 195 195 90 90 90 393 234 234 234 234 261 261 25 470 278 278 278 330 330 388 195 195 195 250 250 394 32 32 259 354 190 380 380 315 315 315 315 450 450 450 413 413 33 58 58 72 72 72 294 294 294 294 294 294 294 294 294 282 282 388 195 64 212 131 427 321 247 126 126 326 326 326 326 101 149 149 228 321 320 22 5 455 455 72 72 72 294 294 294 294 294 388 348 64 64 212 212 26 302 302 302 175 69 69 69 130 280 44 44 44 99 338 338 338 338 338 395 470 486 486 486 460 460 215 354 41 324 324 324 3 335 14 226 411 411 424 424 424 424 424 424 274 122 122 131 472 393 234 234 261 25 486 486 486 460 460 169 99 436 436 436 60 242 116 116 33 212 131 472 221 321 144 27 437 437 306 306 306 460 215 35 29 469 277 277 314 401 401 321 354 180 376 376 376 376 376 282 207 37 24 192 192 427 321 247 126 126 23 408 408 408 149 228 321 321 320 127 448 448 448 14 14 411 493 493 493 493 493 216 127 300 334 334 59 452 186 99 338 338 400 400 400 30 422 58 58 72 110 110 139 139 139 293 293 122 122 34 180 113 113 113 113 167 167 36 449 123 123 123 123 183 183 57 57 57 57 57 203 381 381 381 48 48 417 417 417 170 421 421 421 421 491 128 491 128 128 193 193 17 +103-1241-0018 103 471 1 8 31 5 29 27 40 39 36 3 30 24 19 31 33 14 24 4 34 39 36 22 5 34 9 14 33 5 37 17 30 20 25 17 15 9 5 23 40 1 32 20 31 13 11 19 25 5 29 19 22 39 36 23 39 14 23 20 22 23 19 30 31 38 20 33 37 28 31 1 8 24 37 13 30 20 17 23 4 11 33 19 31 20 39 36 1 8 38 5 40 9 19 17 19 25 19 26 33 19 9 20 5 16 30 15 11 39 36 38 14 25 33 22 5 24 19 26 16 14 24 20 1 18 7 3 2 4 6 3 1 2 3 3 2 2 3 2 2 4 3 3 2 2 5 3 4 4 3 2 2 2 3 2 3 3 2 5 2 3 3 10 20 6 3 5 3 2 2 2 3 3 2 7 2 4 2 3 2 3 4 9 3 6 6 8 3 3 3 4 7 8 16 8 2 3 2 3 3 3 3 3 1 2 2 6 2 6 5 16 7 2 2 3 1 3 4 2 2 2 3 2 2 3 5 2 5 2 4 2 2 3 4 2 2 2 5 2 2 2 4 4 3 3 6 5 17 17 17 296 317 317 317 491 491 491 491 491 461 184 321 435 435 321 435 287 287 111 111 111 438 162 342 224 494 494 236 74 470 496 496 496 496 496 274 368 368 9 219 152 152 152 88 353 353 353 245 399 70 473 258 31 54 86 238 272 272 300 245 399 217 473 65 486 460 460 169 164 485 485 485 382 422 458 458 144 27 437 437 151 169 169 164 402 221 401 321 354 29 498 313 313 325 34 462 462 130 402 321 259 79 79 288 360 360 360 200 200 248 445 445 180 171 171 171 252 215 8 354 100 302 375 497 98 185 269 433 390 160 112 112 56 491 312 312 312 187 187 12 12 12 12 12 12 12 23 260 260 260 260 391 391 391 491 321 373 338 400 400 400 400 30 422 162 68 68 115 470 470 120 120 240 314 196 340 116 199 44 44 44 129 259 74 492 236 129 321 445 445 485 485 485 485 485 485 374 374 132 359 81 485 485 134 382 134 359 359 81 166 166 324 422 143 401 401 321 321 144 208 208 208 386 386 386 286 286 286 286 286 286 334 382 59 304 313 186 162 66 482 482 482 482 105 397 336 109 213 213 213 252 143 131 472 393 393 261 343 343 343 343 343 343 343 358 39 39 433 433 160 427 56 247 247 312 126 292 292 326 326 326 326 326 101 101 101 149 149 321 412 287 287 111 111 111 356 356 53 394 212 4 104 104 104 104 104 337 337 337 301 143 144 208 386 431 376 376 376 240 24 36 87 87 87 162 232 172 115 267 267 267 267 267 219 219 477 477 477 477 477 132 13 229 491 247 312 126 292 292 292 23 23 23 101 101 101 149 228 321 412 412 287 111 111 111 378 378 141 141 281 453 142 221 336 420 420 420 416 458 445 485 360 360 360 94 176 135 135 248 76 108 377 87 87 129 354 420 420 420 464 464 44 255 38 349 205 205 261 487 288 288 288 171 171 252 24 131 219 152 152 152 378 43 345 347 347 372 396 313 457 131 221 458 144 27 351 351 319 319 203 53 176 135 328 200 248 393 205 155 332 332 332 332 245 399 429 429 429 429 19 19 229 247 247 126 193 193 193 +103-1241-0019 103 151 1 8 18 4 11 24 15 11 5 29 24 8 24 8 25 11 1 12 5 33 19 16 39 36 11 19 11 5 25 22 5 24 16 14 24 20 33 19 25 8 33 1 14 6 4 2 2 4 3 1 4 3 2 5 5 6 2 3 9 3 2 1 3 2 2 2 4 2 3 2 2 5 2 4 3 3 3 2 4 2 3 9 4 4 17 491 211 491 296 296 363 363 326 101 101 149 149 228 321 321 287 111 111 111 438 58 110 254 254 254 314 196 196 217 473 476 476 476 252 325 34 230 230 230 215 35 196 196 46 46 46 46 438 399 399 217 70 65 480 480 480 480 85 299 299 339 212 131 427 229 247 126 326 326 326 101 149 149 321 321 320 45 45 45 325 118 118 118 118 402 219 152 152 422 236 239 384 371 278 278 314 196 196 242 242 33 90 465 144 27 351 351 319 319 203 53 394 76 205 155 332 332 332 399 399 429 429 429 422 143 108 377 377 87 236 10 479 331 331 428 265 428 428 428 146 207 358 233 131 419 321 247 15 193 193 +103-1241-0020 103 485 1 8 38 35 11 5 25 33 9 20 5 9 19 33 5 16 30 15 11 1 4 25 11 19 33 38 35 11 9 20 23 5 37 23 20 33 19 31 23 20 29 19 25 5 38 8 23 11 1 10 13 30 20 33 30 20 6 23 38 8 33 38 19 34 9 23 36 24 19 25 12 5 24 36 25 32 8 25 11 27 25 33 39 36 34 19 26 22 1 39 36 22 35 11 19 24 4 21 5 25 39 36 38 14 11 38 13 23 19 26 19 25 24 3 30 9 5 23 18 6 23 40 22 35 11 5 25 33 39 36 1 11 8 3 2 1 2 1 2 2 4 2 4 2 2 3 6 2 6 4 12 7 1 2 2 2 1 2 2 2 4 9 3 6 2 3 3 2 5 2 3 3 1 2 3 5 7 4 2 2 8 3 4 3 4 3 5 7 7 4 6 5 1 2 5 4 3 8 3 2 2 1 2 4 6 3 6 6 3 3 2 1 4 2 1 4 3 5 6 26 4 2 4 1 2 2 4 5 4 2 2 2 3 2 2 2 4 2 2 3 2 2 3 6 4 2 2 2 3 6 9 4 4 3 3 1 2 2 2 4 6 11 17 17 17 363 363 363 51 149 228 228 321 321 287 287 111 111 111 111 378 378 43 389 389 389 314 242 242 394 394 32 259 420 420 420 420 464 44 44 236 129 354 354 278 278 325 34 300 255 349 349 234 234 261 190 487 288 288 288 403 171 207 207 37 24 131 427 491 247 126 126 326 326 326 326 101 149 149 149 228 321 209 83 55 55 55 55 322 94 199 177 177 177 457 389 389 389 314 259 259 420 420 420 301 301 251 251 251 251 251 251 251 241 266 266 266 266 266 173 402 402 26 359 359 81 166 324 422 36 377 87 87 38 162 232 172 26 26 359 444 444 213 252 8 354 89 340 116 199 44 44 43 43 364 276 346 346 346 265 85 85 85 139 139 293 293 122 122 35 401 401 401 75 310 107 107 107 395 395 351 264 264 264 468 468 406 337 337 324 252 143 36 161 487 487 487 41 324 3 335 14 14 411 297 297 297 297 297 297 297 293 293 497 497 43 43 364 364 276 346 346 428 428 428 146 358 76 449 472 397 397 333 333 220 220 164 142 221 401 321 321 321 354 425 425 431 374 374 374 374 374 132 132 203 203 53 473 340 340 116 466 22 283 455 399 217 70 473 65 65 350 350 413 413 33 33 394 478 338 338 338 395 470 480 480 480 85 299 299 339 64 212 465 384 430 430 430 430 430 465 449 152 152 152 152 349 164 164 214 214 214 214 360 328 328 200 243 233 192 192 419 229 491 312 312 491 187 187 187 201 201 201 201 201 201 201 201 491 201 491 201 201 435 211 211 408 149 321 321 321 219 152 152 152 152 143 458 144 389 389 389 325 34 255 399 217 217 65 486 486 486 460 460 240 310 107 395 242 116 116 219 219 152 152 378 378 347 347 347 236 239 161 397 133 276 109 189 139 139 175 81 176 135 200 464 464 340 116 33 33 250 217 70 70 70 65 306 306 306 306 396 134 215 35 29 100 497 497 497 58 72 72 72 72 437 481 481 481 481 481 481 182 182 182 375 375 375 185 269 342 86 221 336 144 430 430 430 430 430 430 430 430 430 131 449 449 485 152 477 477 374 132 132 13 229 491 247 312 15 15 15 15 193 193 193 17 +103-1241-0021 103 758 1 24 4 34 39 36 18 4 11 33 15 22 5 25 12 5 31 22 30 6 25 20 23 19 33 5 23 18 4 25 11 6 22 38 14 11 23 20 19 25 18 19 40 1 12 13 25 5 25 11 12 13 30 1 18 20 11 19 31 8 11 19 11 18 38 5 33 19 11 36 1 18 20 22 35 11 25 3 33 13 23 12 19 31 10 8 23 11 38 19 12 5 17 23 27 19 26 8 40 1 12 4 33 12 13 30 18 4 11 9 5 25 5 24 19 31 33 15 22 1 18 20 38 35 11 33 15 22 18 14 18 27 24 5 25 11 23 13 33 24 3 30 19 23 5 11 36 12 4 33 1 32 20 22 35 11 5 25 9 20 23 13 16 33 4 33 9 30 8 33 30 19 37 14 13 25 20 18 7 1 24 7 6 4 2 5 5 2 3 4 4 3 3 2 1 3 6 2 2 4 3 3 3 2 1 2 2 6 5 3 5 7 4 2 2 3 2 5 2 2 5 8 9 17 6 6 3 3 1 3 2 5 9 5 6 2 1 3 4 5 2 2 1 3 2 3 5 1 2 10 37 5 3 3 3 2 3 5 7 2 5 1 3 6 6 6 5 3 1 2 3 2 4 4 5 3 5 10 7 16 4 2 2 2 2 2 3 1 2 4 1 2 3 2 3 6 4 6 10 25 6 3 2 2 2 4 4 3 2 4 4 6 3 1 2 1 2 2 3 2 2 5 2 4 2 3 5 3 6 6 22 7 3 4 2 2 2 1 4 4 3 3 3 2 2 3 3 2 5 2 3 3 3 4 3 2 4 4 10 4 17 17 17 296 317 317 317 317 317 491 317 317 461 461 461 461 461 461 461 184 184 184 184 321 320 7 217 217 217 473 329 329 329 329 329 460 169 164 164 164 219 485 485 485 374 132 132 274 58 58 72 110 254 254 254 254 314 401 75 108 119 295 295 295 295 295 143 458 192 242 242 116 466 466 22 283 455 38 162 54 482 482 105 221 336 79 79 499 499 405 206 206 348 199 41 324 324 301 251 241 431 278 278 285 302 497 497 497 58 58 72 110 294 294 294 294 294 294 282 388 64 212 131 335 14 14 411 411 284 405 405 405 206 178 35 35 441 441 109 109 134 313 24 26 26 359 359 474 474 324 464 340 340 340 116 33 58 183 183 257 257 257 257 257 120 50 50 185 185 185 269 433 433 390 18 427 56 247 312 312 126 292 292 326 326 326 326 326 101 101 101 408 149 149 321 289 7 7 7 4 127 361 361 361 361 361 330 388 94 199 89 89 446 116 33 212 212 127 114 361 361 361 264 264 264 264 468 59 452 452 263 263 417 417 414 47 80 321 321 435 373 451 451 30 30 236 325 490 490 38 162 342 115 273 265 265 428 146 146 325 34 191 191 325 133 133 259 181 181 181 181 167 457 75 108 377 87 87 236 325 371 374 374 374 374 132 98 98 48 48 417 417 170 170 102 102 28 28 40 40 40 40 40 40 40 40 40 40 40 491 362 491 218 366 305 305 491 366 366 40 40 40 40 435 435 435 435 435 373 451 451 30 30 30 422 458 144 27 389 389 389 389 196 196 479 331 307 307 61 61 167 167 457 75 108 119 351 351 139 139 139 293 293 293 216 216 114 258 258 31 54 54 238 238 221 321 310 107 395 395 437 91 91 91 85 85 85 85 450 293 293 122 122 131 133 333 333 220 220 198 22 44 236 129 321 208 208 425 386 241 431 84 496 496 88 88 176 176 176 328 200 200 464 106 265 265 265 265 265 85 85 85 207 318 318 39 433 433 160 427 247 247 126 126 126 326 326 326 326 326 326 326 101 101 149 149 228 321 320 127 45 45 45 45 35 198 114 0 0 222 58 58 110 254 254 254 314 401 321 354 137 137 137 94 44 44 44 217 473 65 258 31 31 342 68 68 68 238 6 272 470 470 470 171 171 171 358 358 358 233 321 192 192 419 419 439 439 78 78 78 491 28 491 28 28 28 2 491 491 2 341 341 341 12 12 12 21 21 21 408 408 149 228 321 321 373 451 451 30 30 30 378 378 389 389 389 389 129 259 108 119 295 295 295 295 295 143 458 192 156 156 156 245 245 58 72 72 350 350 350 350 350 350 413 203 381 53 89 89 322 67 466 241 431 443 167 167 457 196 217 65 329 329 42 42 147 147 380 256 139 175 175 423 423 423 423 236 75 371 371 374 374 132 132 216 127 114 92 92 92 92 92 167 385 243 227 419 439 78 78 170 170 170 47 491 491 2 2 491 2 2 491 491 2 491 316 435 435 316 316 435 321 435 373 338 338 400 400 400 30 422 143 458 144 389 389 389 389 314 242 242 394 76 76 259 420 420 420 301 26 251 241 431 443 443 169 169 402 402 6 272 415 415 385 129 401 321 259 190 380 499 499 428 428 146 146 457 457 147 147 380 288 288 173 173 29 29 495 495 406 467 467 365 330 94 475 475 324 324 58 58 72 110 268 315 315 315 268 450 450 98 98 229 247 15 15 193 17 +103-1241-0022 103 817 1 25 27 24 4 33 14 38 5 33 24 19 31 33 15 22 18 4 11 9 19 25 24 15 11 1 31 27 6 23 22 38 13 31 10 5 25 40 5 25 11 13 22 31 29 23 5 25 15 32 5 25 40 24 8 33 13 40 38 13 23 9 20 11 19 16 14 11 5 25 33 19 23 18 20 38 5 40 31 15 16 23 20 9 4 22 4 33 17 30 20 25 17 15 9 5 23 40 1 8 24 31 3 30 20 8 38 5 40 23 15 33 1 18 20 31 13 11 32 8 23 20 1 22 5 24 5 23 6 26 1 12 5 18 6 30 31 19 40 27 37 14 19 25 12 20 39 3 30 11 1 17 19 37 24 20 39 35 30 9 4 17 1 27 8 22 5 25 22 13 30 20 19 33 1 12 5 10 8 23 11 30 19 31 29 3 25 11 19 11 10 19 30 16 5 23 20 1 8 4 2 4 3 3 3 4 3 3 2 2 5 3 3 2 1 2 1 2 2 2 4 7 5 14 6 5 7 5 3 2 3 4 4 3 4 2 1 2 1 3 3 3 2 2 2 2 5 7 2 5 4 3 4 2 3 4 3 5 3 3 3 3 2 6 5 3 1 3 3 2 2 3 2 2 2 2 5 5 2 3 2 3 4 4 2 2 3 3 3 3 3 5 2 2 4 10 35 9 4 5 4 2 4 3 2 2 4 3 5 12 6 6 2 5 3 3 5 6 3 8 23 4 2 2 1 4 4 5 24 2 2 4 3 3 4 3 3 5 5 2 2 2 2 2 3 6 5 5 25 3 2 2 2 2 2 2 2 3 8 7 21 10 4 3 1 4 6 3 4 5 2 7 12 2 2 6 4 3 2 1 2 3 4 2 2 2 1 3 7 1 3 4 2 2 8 6 17 17 17 363 51 51 228 321 320 309 331 331 231 231 399 399 473 65 486 486 460 240 285 300 382 245 43 364 276 181 181 181 181 167 35 35 196 196 473 258 258 31 342 86 86 6 272 470 470 171 171 252 458 458 192 389 314 314 321 354 137 137 137 399 217 217 473 476 476 476 476 476 476 207 37 24 131 427 229 321 247 312 126 292 292 23 23 23 23 408 408 391 228 228 321 373 66 172 115 273 344 84 274 88 14 14 411 297 297 297 297 297 297 293 293 122 35 458 208 208 441 109 151 151 151 169 150 54 238 238 310 107 60 298 298 298 379 471 471 49 342 89 89 446 67 34 145 443 154 178 96 96 342 105 105 321 354 386 386 386 469 116 94 418 418 418 418 418 418 99 436 436 436 60 298 298 298 379 379 471 471 471 49 9 142 221 196 70 65 428 428 428 146 325 325 34 253 253 453 9 142 133 364 276 109 109 139 139 139 293 293 293 122 35 354 420 420 420 422 36 384 490 490 490 349 349 234 261 25 487 498 498 498 396 313 285 131 34 89 116 33 394 465 377 351 139 139 139 175 58 451 30 30 378 43 141 141 141 31 162 232 68 68 115 273 470 171 171 252 173 402 402 26 359 166 166 301 8 354 354 180 376 376 460 178 178 458 192 415 415 314 472 221 458 208 79 288 288 360 360 434 200 248 248 212 445 445 171 171 171 171 252 215 354 100 100 302 375 375 185 185 269 390 390 18 112 427 56 56 312 312 312 312 292 292 292 292 292 12 12 12 12 12 12 12 12 260 260 260 260 163 163 163 163 163 163 491 316 316 491 316 316 73 289 321 320 287 287 287 111 111 111 85 438 203 53 394 478 162 232 68 115 273 106 499 499 306 396 337 337 464 464 464 111 111 378 88 345 141 281 31 342 26 251 251 241 431 403 171 171 171 358 358 233 321 227 227 419 419 439 439 225 225 47 47 47 491 47 80 80 80 289 451 451 451 30 30 422 162 232 172 115 179 179 120 120 314 457 310 310 338 338 395 499 499 265 265 85 146 146 37 359 359 474 474 474 19 454 454 417 414 170 170 170 47 28 28 2 2 2 491 491 2 2 2 2 491 316 491 316 435 435 289 435 321 144 27 351 319 319 53 255 255 255 251 241 431 235 235 235 235 413 413 98 48 13 13 13 170 321 170 312 187 187 292 292 292 292 23 23 23 23 23 101 101 149 149 228 289 321 321 127 5 5 455 72 72 441 153 153 153 372 396 313 186 54 54 224 50 356 281 281 9 168 106 410 410 410 410 410 173 402 29 495 406 467 340 340 340 466 22 283 455 448 219 219 219 180 180 306 306 306 306 306 306 59 37 37 404 439 439 439 78 78 170 170 28 28 28 491 2 2 491 2 2 2 491 491 2 316 491 316 316 316 73 73 289 321 321 445 445 278 278 173 196 196 429 429 429 219 464 222 222 245 245 245 8 354 180 376 376 376 376 376 282 37 37 233 192 419 419 439 78 170 170 442 442 187 442 187 187 12 12 12 12 260 260 260 149 149 289 289 321 289 209 287 16 16 16 16 16 88 88 111 111 111 111 438 143 35 389 389 389 33 394 76 465 445 445 445 351 351 264 264 264 468 468 468 337 337 324 324 464 277 277 277 385 36 227 419 439 78 78 170 491 47 187 47 47 47 442 442 442 442 442 127 22 5 236 36 36 107 395 351 91 91 91 91 206 206 122 122 35 29 456 456 31 162 9 105 336 74 106 426 426 206 348 64 212 191 191 191 314 401 321 108 107 107 395 485 286 286 286 468 245 349 349 155 262 262 359 359 474 474 474 474 19 454 229 321 247 15 15 15 193 193 17 +103-1241-0023 103 833 1 19 33 19 40 5 25 33 18 13 37 20 1 8 37 17 3 33 1 6 23 24 8 38 14 23 11 23 20 17 35 11 40 19 25 19 33 1 9 5 33 19 33 19 40 5 25 33 18 13 37 20 1 4 25 11 19 16 19 33 19 40 5 25 33 22 13 30 20 11 19 25 21 5 31 33 5 31 14 33 5 25 38 15 12 5 18 4 25 11 5 23 29 35 23 40 7 33 1 31 27 8 11 9 13 33 14 22 20 29 19 33 9 19 22 5 40 8 25 27 12 20 19 17 40 4 22 33 25 4 22 5 37 19 33 1 19 33 31 5 25 13 22 31 33 30 20 24 23 20 27 23 11 22 3 30 29 5 33 9 4 17 1 27 8 24 37 13 30 20 17 23 4 11 39 36 37 22 5 24 1 20 37 19 25 19 16 19 33 38 35 11 18 4 37 9 19 25 8 31 33 19 31 23 20 29 19 25 5 38 8 23 11 10 13 30 20 33 30 20 1 8 4 2 2 4 1 2 1 4 3 4 8 14 7 1 4 4 3 1 7 3 4 6 4 3 3 2 2 2 4 4 3 3 2 2 4 3 15 3 1 3 1 3 1 4 2 1 2 4 2 4 10 23 6 1 2 4 3 2 2 2 3 2 2 2 6 3 4 2 3 2 4 3 3 3 3 2 6 5 1 2 3 4 7 3 1 8 2 2 2 1 3 4 3 3 4 7 4 12 7 4 6 2 2 2 3 3 6 3 3 3 2 2 2 3 3 4 5 5 3 3 3 2 5 4 5 6 2 4 3 4 3 2 3 8 18 4 2 1 2 2 1 3 3 2 2 4 2 1 6 3 6 3 4 3 3 2 2 2 4 6 6 27 8 3 3 3 2 3 4 3 2 5 2 3 2 3 6 3 9 6 7 2 2 1 3 4 2 4 2 2 1 2 1 3 3 2 6 5 4 2 3 3 2 3 3 1 2 4 5 6 3 2 6 4 3 3 5 2 8 10 17 17 17 363 51 51 228 289 321 188 177 177 177 325 356 356 356 342 342 224 242 242 116 131 131 72 72 110 443 443 240 173 280 41 41 41 41 19 454 417 417 417 417 170 47 491 47 491 491 491 47 47 80 321 321 435 435 435 209 111 111 111 202 202 402 402 458 27 180 405 405 206 167 457 14 14 14 209 411 297 297 297 297 297 297 297 293 399 70 70 46 46 46 46 46 438 378 43 364 109 109 498 498 134 387 122 122 26 26 359 81 166 324 416 239 458 144 180 484 278 240 314 77 270 342 224 340 340 340 94 199 277 277 277 277 227 419 229 247 247 126 126 326 326 101 101 149 391 80 80 80 80 289 321 354 159 159 159 325 34 177 177 325 356 356 356 31 342 224 242 242 379 131 131 72 72 110 443 443 443 173 173 280 41 41 41 41 19 19 454 454 454 78 170 170 491 312 312 292 292 292 292 292 21 21 21 21 21 21 408 408 149 149 228 228 289 321 209 209 83 55 55 322 322 94 199 118 118 118 118 118 205 177 177 177 177 325 356 356 356 342 342 242 242 116 64 131 472 221 144 445 445 351 351 264 486 468 468 468 337 337 337 324 252 325 34 89 340 116 33 394 212 465 395 395 151 151 169 150 86 86 6 272 34 44 38 162 68 172 115 273 498 498 498 396 240 35 242 242 242 116 250 250 364 364 109 109 403 403 403 207 171 3 252 216 198 22 5 455 72 72 72 72 294 294 294 294 330 64 64 212 302 302 302 497 122 129 259 74 441 441 424 424 497 497 497 49 342 168 180 180 113 113 113 113 450 167 167 131 427 321 247 126 126 326 326 326 101 408 408 149 391 491 321 373 66 68 68 115 273 84 16 88 88 111 111 111 111 438 438 438 35 259 354 180 443 443 285 300 382 313 313 143 458 458 445 445 213 213 213 252 215 354 277 277 277 277 143 259 259 354 420 420 143 458 144 351 494 253 368 453 168 106 111 111 111 438 438 10 10 479 331 84 84 496 274 216 198 448 448 448 464 154 154 154 416 32 96 368 453 453 115 470 470 486 486 376 460 460 178 35 96 96 401 196 196 309 309 479 331 486 486 460 460 178 458 192 192 69 223 130 280 277 277 277 277 385 385 75 227 419 439 78 170 47 47 47 491 491 491 2 2 491 491 316 316 316 73 289 321 321 209 177 177 177 356 356 342 168 44 116 199 154 154 96 96 54 482 238 161 161 487 288 360 360 360 339 53 359 166 166 166 324 14 14 411 411 424 424 424 424 424 424 122 122 122 131 472 221 144 27 437 306 306 306 306 396 215 35 29 277 277 314 401 321 259 354 180 376 376 376 376 120 282 37 233 192 419 427 78 170 491 312 312 312 341 341 341 341 341 12 12 12 12 21 21 326 326 326 326 101 101 149 149 228 289 321 321 209 287 16 16 16 88 88 111 319 319 203 53 394 212 4 104 104 104 104 406 337 337 337 324 422 143 458 208 386 431 376 376 376 460 240 24 36 107 152 152 152 202 402 402 402 259 144 27 27 351 319 319 319 319 203 381 381 117 48 417 417 417 417 197 491 435 80 289 321 209 188 357 357 357 357 357 173 280 242 116 94 118 118 118 118 118 280 177 177 177 177 457 457 364 345 389 389 389 285 34 202 202 202 402 401 259 354 137 137 137 137 33 10 10 479 331 265 265 428 146 146 146 39 86 6 272 87 87 87 162 54 86 26 26 444 444 213 252 215 354 340 340 340 199 44 44 44 43 43 364 276 346 346 265 85 85 85 139 139 293 122 122 314 401 401 75 107 107 395 351 351 264 264 468 468 406 337 337 324 422 36 36 161 161 487 487 487 41 41 19 19 19 454 417 417 421 421 491 421 128 128 128 193 193 17 +103-1241-0024 103 794 1 38 20 37 17 3 33 19 11 30 8 37 5 23 6 26 29 20 31 18 4 37 5 25 33 38 20 1 24 19 31 19 40 31 29 13 25 31 14 31 13 11 19 33 38 5 40 15 33 24 8 23 40 1 8 24 17 23 4 11 9 19 22 5 40 8 23 5 37 11 30 8 37 19 26 1 27 19 33 31 20 24 40 31 27 38 5 25 11 14 16 5 23 12 5 33 8 24 17 27 19 26 33 5 23 19 37 38 19 34 39 36 5 25 11 9 19 23 6 26 33 19 39 36 1 8 37 25 13 37 14 9 19 23 6 26 11 33 36 13 25 20 9 5 11 20 1 25 3 33 30 20 23 20 1 9 5 33 12 20 5 31 8 23 5 24 38 5 40 12 5 38 14 31 33 1 8 37 27 25 23 20 9 19 25 19 25 19 33 16 6 30 24 5 25 34 31 1 9 5 33 12 4 33 38 5 40 19 25 5 16 1 13 5 2 2 2 3 4 1 2 3 4 3 2 2 4 4 5 5 4 3 3 2 2 2 2 2 8 19 3 2 3 2 2 2 3 3 3 3 3 5 2 2 2 2 2 1 4 7 3 3 8 3 5 16 7 3 3 3 5 2 3 2 3 2 3 4 6 3 4 2 3 6 1 2 9 13 11 3 4 5 3 3 2 5 7 4 2 6 2 2 4 2 2 1 2 1 3 3 3 1 3 1 3 2 6 2 3 2 1 3 1 3 2 1 2 1 2 6 4 4 4 2 5 5 16 6 3 2 1 3 2 2 2 3 4 2 2 3 3 2 2 3 2 2 3 8 10 4 2 4 3 5 4 9 16 3 3 2 2 2 3 4 6 2 2 2 2 1 4 2 2 5 6 5 5 22 7 4 2 2 2 3 3 2 2 3 2 3 2 5 3 3 4 4 3 3 4 10 2 2 2 3 3 3 1 2 4 2 4 6 11 10 17 17 17 296 317 491 317 317 491 184 184 184 184 320 7 345 152 152 152 152 402 221 144 180 189 405 206 167 36 377 87 87 236 161 79 499 499 499 428 146 173 173 280 29 255 251 251 241 235 235 235 235 235 348 248 76 259 74 74 351 213 213 213 213 213 186 39 342 342 224 110 110 110 202 202 202 430 430 430 430 430 243 133 259 345 109 41 41 19 19 454 229 82 229 312 312 126 292 292 292 292 292 292 21 21 21 21 408 408 408 149 149 321 321 320 473 258 258 31 342 224 494 494 31 162 232 105 105 336 470 470 432 330 330 379 77 342 224 300 300 382 186 186 54 172 273 470 470 120 240 325 177 177 177 378 345 141 141 281 342 168 470 411 171 171 171 171 252 314 401 196 196 217 70 65 265 265 265 85 85 85 139 139 375 375 185 269 433 427 427 247 247 126 126 126 326 326 23 23 23 23 23 101 149 149 149 321 321 287 111 111 111 438 356 203 64 90 212 144 208 386 431 376 376 376 376 85 37 24 35 259 354 420 420 422 143 144 27 351 368 453 168 106 111 111 111 438 438 251 251 251 241 266 266 266 266 173 402 402 221 75 161 161 79 499 499 428 85 146 146 173 173 176 176 176 328 200 200 117 404 404 439 439 225 237 237 260 260 260 260 260 391 391 289 321 321 321 209 287 287 16 16 16 16 16 88 88 177 177 177 177 35 478 478 68 172 172 444 444 444 360 339 339 394 478 478 232 232 68 172 344 344 344 344 344 274 43 43 43 364 364 276 174 319 319 348 348 348 64 64 212 212 300 469 134 349 155 262 262 100 100 497 122 45 45 45 325 111 111 111 203 53 90 212 144 106 88 319 135 135 248 465 377 87 87 87 251 251 251 251 241 278 278 278 173 402 402 345 333 220 220 164 219 477 477 477 88 89 89 446 53 212 354 354 255 251 251 251 251 241 431 235 235 235 235 235 348 248 248 465 449 377 123 123 123 219 219 477 477 477 477 477 132 13 321 247 312 126 126 326 326 326 326 326 326 101 101 149 149 228 321 412 287 111 111 111 438 202 402 6 479 463 463 463 280 29 382 245 8 354 354 134 497 251 241 431 235 235 235 235 348 76 465 108 123 123 123 88 109 475 475 94 475 475 475 301 8 354 106 493 493 240 325 41 41 41 19 454 454 229 491 247 312 126 126 23 408 149 149 228 321 320 7 331 307 307 307 167 457 457 42 147 380 485 213 213 286 286 139 139 175 359 474 474 41 41 19 19 454 454 13 414 170 47 47 47 491 47 491 491 47 491 102 435 80 80 289 321 7 7 354 159 159 159 314 35 22 448 448 464 464 255 38 162 68 115 273 106 265 265 85 85 146 175 81 242 203 250 250 345 141 141 281 453 9 198 22 283 455 43 364 364 276 109 109 498 498 498 396 271 271 39 39 86 86 238 6 227 419 439 78 56 56 28 491 28 491 2 491 2 341 341 12 12 21 21 23 101 101 101 149 149 228 321 287 287 111 111 202 202 202 280 29 106 350 350 350 175 466 166 166 166 301 8 137 137 137 137 94 199 340 340 340 94 199 277 277 385 457 393 205 155 155 332 148 148 148 372 372 245 399 399 217 70 65 319 319 319 319 379 379 243 77 270 433 433 112 427 247 247 126 126 23 408 408 391 228 321 320 320 159 159 159 159 129 259 127 114 92 92 92 92 457 457 141 141 141 281 342 168 168 340 340 116 10 479 331 331 230 230 230 169 169 169 352 352 352 352 352 352 112 112 78 56 421 421 491 15 15 15 193 193 193 17 +103-1241-0025 103 241 1 19 33 31 38 14 31 12 5 25 1 13 25 20 34 19 26 39 36 22 35 11 19 24 4 21 5 25 1 24 19 31 19 40 31 29 13 25 31 14 31 13 11 19 33 38 5 40 38 19 22 5 11 5 37 24 20 33 19 33 6 22 23 8 22 12 4 33 1 13 4 3 3 4 4 3 3 2 3 3 5 2 4 3 2 3 2 3 2 1 2 3 6 6 5 3 7 19 4 2 3 2 2 4 3 1 5 2 3 4 1 2 1 2 2 1 4 3 2 3 2 2 1 3 2 2 3 3 5 5 4 3 3 4 3 5 3 9 17 17 17 363 363 363 51 51 51 228 491 321 321 209 177 177 177 177 356 77 342 142 397 336 345 109 498 498 498 313 186 39 342 68 198 114 114 242 446 116 457 335 401 321 226 321 209 475 475 475 475 475 475 475 475 422 349 164 214 214 214 214 200 248 219 152 152 152 143 458 192 389 389 34 121 121 399 217 217 217 217 473 65 486 486 486 460 460 460 24 310 107 107 242 275 275 275 303 303 117 404 13 78 170 170 491 491 312 187 187 187 187 12 12 12 12 12 408 408 149 149 228 321 320 7 473 258 258 258 31 342 224 494 494 31 162 232 232 105 105 336 470 432 432 330 379 64 77 77 224 300 300 382 186 186 54 273 470 470 240 34 177 177 378 345 141 141 281 9 142 397 364 364 109 109 278 143 458 192 192 469 325 34 223 130 402 196 70 429 429 429 422 108 377 87 87 236 259 108 119 119 437 405 405 405 405 206 178 35 321 26 386 266 266 266 266 266 178 458 96 321 127 114 92 92 92 92 92 167 385 427 82 247 126 126 326 326 326 193 193 193 +103-1241-0026 103 550 1 12 15 1 38 14 17 35 11 1 39 36 25 27 1 12 20 5 31 8 23 5 24 29 20 29 5 23 1 9 5 33 12 13 30 19 40 31 27 23 19 33 5 23 31 22 27 29 16 14 12 20 19 24 4 21 5 25 15 32 5 25 19 25 5 25 5 31 8 23 5 24 1 27 25 23 20 21 5 31 33 19 25 12 20 5 12 14 6 30 16 5 25 40 1 19 33 38 5 40 29 30 19 33 20 19 25 33 30 19 31 33 19 26 33 5 19 24 4 21 5 25 34 19 26 40 5 9 7 33 12 13 24 1 12 2 4 2 2 2 4 7 6 12 4 3 3 8 10 4 1 3 3 6 2 3 2 2 5 3 2 8 27 2 2 2 2 2 2 3 1 6 4 3 2 1 2 3 7 3 4 2 4 2 2 3 3 4 3 5 2 6 6 7 1 2 4 1 2 3 3 7 5 5 2 6 28 6 3 2 3 4 2 4 2 2 2 2 3 2 3 3 6 3 6 2 5 12 40 4 2 3 4 3 3 2 1 2 3 3 2 2 1 2 2 3 2 2 4 2 1 4 4 4 1 3 2 2 3 4 1 2 6 3 2 6 7 5 17 17 17 296 317 491 184 184 184 184 289 321 320 127 0 0 0 0 378 354 347 347 347 245 416 129 321 144 180 484 484 484 484 120 37 37 37 24 24 404 414 414 414 47 47 47 47 491 47 80 80 321 321 289 7 219 152 152 152 116 94 331 84 84 84 84 16 274 98 229 247 247 126 326 326 326 326 101 149 228 321 321 320 22 448 464 255 38 162 342 115 273 106 265 265 85 85 146 175 175 81 242 203 394 76 259 74 485 213 213 213 252 215 259 354 100 100 100 497 98 98 98 13 417 417 170 170 170 170 28 491 28 2 2 2 2 2 2 2 2 2 2 2 2 491 316 316 316 73 289 321 289 321 159 159 159 159 35 127 114 0 222 406 467 356 356 281 162 232 232 68 172 115 344 344 344 344 274 274 251 241 431 278 285 285 302 497 497 186 162 162 232 482 482 482 105 336 144 180 496 496 274 215 457 96 393 155 155 332 332 216 216 448 448 448 464 121 121 399 217 217 65 65 486 460 240 240 310 449 107 242 242 116 33 10 10 10 309 331 418 418 418 418 418 252 99 99 436 436 60 60 298 298 116 199 199 340 340 116 94 199 242 466 94 199 459 44 38 31 162 68 68 115 273 273 265 265 85 85 146 146 175 175 81 81 275 203 203 381 381 48 13 229 321 247 312 126 292 292 292 292 292 292 21 21 23 23 23 23 23 23 260 260 260 260 260 391 391 228 321 321 412 287 287 350 350 350 350 350 250 81 166 166 166 422 36 310 395 395 151 151 150 39 86 238 272 34 340 340 116 466 22 448 448 464 464 493 493 493 300 300 382 245 14 14 411 411 153 153 372 372 372 396 349 349 234 234 25 242 275 275 379 379 471 471 49 269 433 390 390 112 112 56 56 56 305 170 28 28 28 491 28 491 28 362 491 362 491 362 362 362 362 40 40 362 362 362 305 362 362 491 218 218 40 40 40 40 435 435 211 21 326 326 408 408 408 149 228 321 177 177 177 177 378 364 345 141 141 141 141 281 453 9 142 336 74 190 487 104 278 325 34 324 324 464 464 121 121 121 64 161 161 487 469 186 54 86 6 272 176 176 328 200 248 76 465 377 87 123 255 255 399 217 473 65 486 486 460 460 240 310 449 242 242 116 394 76 465 214 214 214 328 200 248 49 453 342 168 255 8 354 180 113 113 113 113 167 167 35 198 198 114 114 114 57 57 120 282 203 381 381 381 117 48 229 321 247 193 193 17 +103-1241-0027 103 743 1 18 36 18 4 11 9 19 25 31 33 27 23 5 25 5 38 15 16 14 24 18 14 29 13 30 5 25 33 31 19 25 18 14 19 25 16 5 25 31 20 9 8 5 22 30 36 23 25 14 31 18 36 11 8 11 9 19 16 6 30 32 20 22 35 11 22 5 25 16 13 31 1 8 39 36 40 11 33 5 23 8 5 38 15 22 4 33 25 8 33 31 5 25 11 19 24 4 21 5 25 34 19 26 40 23 8 22 12 4 33 1 9 19 22 5 40 8 11 19 11 5 25 18 4 37 33 8 24 19 25 12 5 11 15 1 8 17 13 31 12 4 33 31 38 8 5 24 31 27 34 19 25 1 8 4 24 11 30 13 11 16 5 23 34 19 25 15 25 33 8 1 12 13 30 19 40 5 25 5 29 19 22 3 25 24 8 9 27 25 40 1 5 5 3 1 2 2 2 1 4 4 4 3 4 1 2 2 5 3 4 2 2 3 2 8 3 2 2 2 2 2 2 2 2 6 4 3 3 2 3 4 3 4 6 2 8 2 5 5 5 6 5 3 3 4 8 2 2 3 3 2 1 5 2 3 1 2 3 1 2 6 5 10 30 7 4 2 4 2 1 3 3 5 4 3 3 3 2 3 2 5 3 2 1 2 1 2 4 4 4 2 3 3 1 5 2 3 5 3 2 7 4 17 2 3 2 3 3 2 3 2 2 1 2 4 1 2 6 5 3 1 2 2 2 2 11 27 6 4 3 3 3 1 2 3 2 5 2 2 5 4 4 4 7 17 8 4 3 2 2 2 3 2 1 2 4 3 2 6 2 2 10 34 3 2 1 3 2 2 1 3 8 4 3 2 2 3 6 4 8 4 8 6 17 17 363 363 51 228 373 489 489 489 489 88 88 254 254 254 314 8 354 137 137 137 33 394 478 478 482 482 482 6 272 371 189 189 424 424 497 122 34 34 242 116 285 199 255 43 43 109 109 403 403 171 301 349 205 155 165 165 165 53 58 156 156 156 156 245 129 129 321 74 74 351 351 351 264 264 468 468 406 11 11 379 379 77 77 342 224 340 340 94 199 156 156 156 245 14 14 411 411 188 121 121 121 53 394 76 205 261 25 469 11 379 379 77 342 342 224 41 41 41 301 143 259 354 62 62 62 62 464 464 44 44 44 129 321 458 208 208 190 190 441 487 487 153 424 424 182 182 497 497 497 497 122 10 10 479 331 498 498 498 498 498 396 271 186 39 323 323 142 489 489 489 489 422 32 239 321 384 371 180 265 265 265 265 85 85 146 24 35 259 354 255 255 349 155 155 148 148 148 387 186 99 400 400 400 30 143 458 144 389 389 314 90 458 144 121 121 203 394 76 4 205 261 25 470 443 443 443 169 271 150 39 433 433 433 160 112 427 56 247 312 312 312 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 23 23 260 260 260 260 260 260 391 149 228 321 321 412 287 287 111 111 438 219 219 219 485 485 374 186 162 54 86 238 272 272 494 139 175 251 241 431 265 265 85 146 146 464 464 255 43 43 109 109 403 171 171 143 192 192 469 314 314 196 196 479 331 428 428 428 428 146 385 35 75 342 224 89 446 94 199 255 255 217 217 473 65 486 486 460 460 368 310 449 60 242 116 116 394 76 259 214 214 214 214 200 200 471 49 453 26 26 241 266 266 266 266 266 266 416 96 198 198 114 92 92 92 92 92 92 167 385 233 131 229 247 247 126 126 326 326 326 326 326 408 408 149 149 228 491 289 321 321 354 420 420 143 458 192 485 494 368 342 168 111 111 111 240 325 371 371 278 278 116 33 33 58 72 110 110 202 202 202 402 402 36 119 119 103 103 103 103 85 299 299 203 53 473 340 340 466 22 283 455 236 384 371 93 93 93 93 207 207 207 19 454 263 417 417 417 417 417 170 170 28 491 28 491 491 2 491 2 491 2 2 2 163 316 491 435 435 435 435 321 321 321 435 287 111 111 438 438 458 445 357 357 443 271 31 342 342 198 114 92 92 169 77 342 142 397 345 346 181 428 438 464 464 365 330 203 394 478 172 115 273 344 344 344 274 349 164 164 164 470 278 278 120 330 388 195 195 117 48 417 417 417 170 47 47 47 47 47 47 491 491 47 491 80 80 80 321 435 435 287 287 111 111 111 438 464 365 365 365 330 203 53 64 212 161 79 288 151 240 314 131 393 262 262 100 497 497 349 164 224 470 432 365 330 94 199 331 145 290 290 434 434 339 212 131 180 284 265 265 85 85 207 207 454 454 229 321 247 312 312 126 292 292 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 101 101 149 149 228 321 321 320 127 0 0 222 468 356 356 356 453 342 242 116 199 44 44 44 129 35 401 401 401 321 74 351 351 278 278 178 458 192 180 125 125 125 348 250 70 46 46 46 46 46 438 301 8 239 354 106 106 84 496 496 496 496 413 413 413 471 471 49 269 433 390 160 112 56 417 201 201 201 201 193 193 17 +103-1241-0028 103 777 1 8 11 36 23 5 37 33 36 19 24 4 21 5 25 8 24 25 8 31 5 25 11 29 23 5 24 29 38 19 34 11 19 24 29 5 23 40 19 25 24 8 13 23 9 27 40 1 38 19 12 19 31 1 24 4 34 39 36 40 22 5 24 29 4 25 39 5 25 31 33 3 29 33 6 22 19 26 1 29 3 30 33 23 20 1 9 19 22 5 40 32 20 38 5 40 7 33 5 37 9 30 13 34 5 25 11 29 3 30 33 23 20 9 19 22 5 40 12 15 18 4 11 30 20 10 33 12 5 9 5 17 20 1 25 3 33 5 25 5 12 14 38 14 11 19 11 32 20 31 15 5 25 33 19 23 12 15 18 4 11 23 13 16 33 12 5 37 19 23 19 21 5 25 11 38 14 11 30 8 37 19 26 11 7 25 5 31 33 20 29 23 19 33 5 23 18 19 23 1 10 8 5 6 3 4 3 4 2 2 4 5 5 1 2 5 3 4 7 7 1 2 2 6 3 2 5 3 1 2 4 3 2 3 3 2 3 3 2 1 3 6 3 3 3 5 14 53 3 4 6 6 16 3 3 5 3 2 2 3 3 1 3 5 2 4 2 3 2 4 3 4 2 5 5 4 2 7 8 3 3 4 2 3 8 14 3 2 3 2 2 2 2 2 2 3 4 3 1 4 2 2 6 9 2 1 3 4 4 3 3 2 3 3 3 3 4 3 3 4 2 1 4 2 3 6 1 2 2 3 4 2 9 21 3 2 3 3 2 3 3 3 7 5 4 1 4 4 2 6 6 3 3 3 3 2 2 3 2 2 2 3 3 4 3 2 1 5 2 4 3 6 1 2 2 2 2 3 2 6 3 2 4 3 5 3 3 5 4 5 4 2 2 2 2 2 5 3 9 8 17 17 17 296 317 317 184 184 184 289 209 287 287 111 111 111 438 438 314 32 239 384 371 371 374 374 132 274 251 251 241 431 266 266 266 266 173 402 402 36 108 87 87 88 88 255 255 399 217 217 65 65 486 486 460 460 240 310 107 395 242 275 116 199 199 111 111 85 438 203 203 53 10 10 309 331 331 265 265 428 428 146 146 186 39 342 68 68 224 224 11 116 33 394 472 401 401 321 74 425 425 386 431 319 319 319 203 53 53 53 76 401 259 345 333 333 220 220 402 472 221 239 384 371 278 278 53 53 394 76 259 74 302 302 497 497 49 453 342 168 340 340 116 250 70 46 46 46 46 438 464 464 145 139 139 293 293 122 8 354 354 84 84 496 496 274 185 39 433 433 390 160 112 112 56 56 56 56 28 28 491 491 28 28 491 491 362 491 362 362 362 491 362 362 362 362 362 362 491 362 362 211 211 362 491 369 369 369 369 369 369 369 369 21 21 21 21 21 21 21 260 260 260 260 260 260 391 391 391 491 289 321 321 7 7 345 333 333 220 220 314 32 4 4 127 114 258 258 258 258 258 31 39 342 433 390 390 390 160 160 160 160 97 97 225 225 80 80 80 321 321 7 217 473 329 329 329 329 329 164 164 485 485 485 485 374 368 31 142 221 336 27 121 399 53 76 465 74 351 351 365 365 365 330 388 64 219 398 398 275 275 116 471 471 478 66 482 238 6 272 106 405 405 405 167 215 96 96 75 108 119 437 405 405 405 405 206 206 178 458 192 176 176 328 328 200 303 48 48 417 225 80 80 491 321 80 289 289 320 74 437 437 306 306 396 396 396 35 35 26 359 359 474 474 474 474 19 19 454 229 321 247 126 126 326 326 326 326 101 101 408 391 228 321 289 321 320 354 420 422 143 144 27 494 278 186 99 400 400 400 378 378 141 141 141 281 168 106 113 113 113 206 240 285 34 462 462 402 401 259 259 354 380 380 288 443 120 120 169 169 169 169 352 352 352 352 352 352 97 89 55 322 67 90 90 259 74 74 437 437 306 306 306 396 396 385 35 26 359 359 474 474 324 301 301 354 420 420 420 143 321 144 27 351 253 253 368 453 342 198 198 127 0 0 0 0 58 110 254 254 254 131 133 147 147 288 213 213 143 233 310 107 447 447 447 198 198 22 283 455 8 354 354 329 151 151 416 416 41 41 41 41 19 19 454 454 170 170 491 491 312 491 312 312 292 292 292 326 326 326 326 326 101 101 101 149 149 228 321 320 479 331 307 307 61 285 34 44 44 44 94 199 493 493 493 493 216 300 300 382 245 43 364 364 276 109 498 498 498 498 396 313 313 314 36 430 430 430 430 36 36 310 107 400 400 400 30 422 162 162 68 68 115 273 470 403 403 207 207 464 464 89 319 348 64 76 465 108 139 139 139 139 497 122 216 0 0 0 0 58 254 254 254 314 26 251 241 431 443 443 443 169 402 402 96 75 472 198 198 22 283 455 4 4 4 280 278 278 278 175 175 81 459 469 37 37 24 310 107 395 89 89 322 322 250 250 347 347 347 236 129 161 79 79 499 499 265 85 85 146 146 173 173 176 176 135 135 200 248 248 384 371 180 315 315 315 450 450 413 94 199 44 44 38 342 342 68 482 6 336 384 371 213 213 213 213 252 215 129 321 26 26 26 81 278 278 285 26 302 497 497 58 58 183 72 351 278 278 278 139 139 375 375 375 375 98 98 13 229 321 247 15 15 193 193 193 17 +103-1241-0029 103 765 1 12 5 30 27 11 29 3 30 33 5 37 38 19 10 18 4 11 9 19 25 22 5 33 31 27 11 20 29 23 20 19 25 33 5 12 5 31 6 16 33 31 28 23 1 12 5 33 12 5 9 4 26 22 31 1 16 30 19 25 21 11 38 19 12 9 23 36 24 19 26 38 8 23 11 10 13 30 20 33 30 20 40 5 25 11 31 23 19 24 38 8 33 9 14 10 19 40 1 38 14 31 13 37 30 5 23 16 20 33 5 9 5 37 12 13 30 18 13 11 40 1 12 5 10 8 23 11 29 35 33 7 33 18 14 18 4 25 11 5 25 11 9 30 27 22 6 16 5 9 30 4 25 10 5 37 38 8 23 11 29 23 5 24 12 5 33 9 30 5 32 33 5 17 13 25 31 33 12 5 31 8 11 5 37 12 5 9 5 17 20 1 10 3 3 4 7 2 3 4 3 2 2 5 2 4 4 1 2 1 2 2 4 4 4 4 6 6 4 5 3 2 3 2 2 3 2 2 3 6 6 4 3 7 8 7 12 3 2 1 3 2 5 5 6 3 9 2 2 3 2 2 4 2 2 2 3 4 3 4 3 3 7 3 7 3 3 7 3 3 4 5 2 5 3 2 3 2 6 2 4 7 3 4 4 6 6 5 5 7 13 3 2 7 3 3 2 2 3 6 4 2 2 3 4 3 3 3 2 4 7 5 9 38 2 3 6 9 4 3 4 2 3 5 3 3 3 5 6 4 1 2 1 3 2 3 3 3 5 3 2 6 2 5 3 5 2 4 3 7 2 5 6 3 4 8 2 3 3 4 2 3 6 2 1 3 3 3 2 1 2 2 4 5 3 1 2 2 2 3 5 3 8 15 17 17 17 363 363 363 51 51 228 321 321 320 5 5 455 42 42 147 380 380 496 496 496 274 274 122 24 131 472 221 259 74 437 306 306 306 306 169 167 36 449 69 223 130 130 402 402 345 345 109 407 407 407 385 36 75 310 395 254 254 254 314 259 354 137 137 137 33 394 394 465 144 27 351 189 189 151 167 167 457 478 478 66 68 68 115 344 344 344 344 344 344 274 236 32 239 384 371 213 213 213 213 215 129 321 354 359 359 474 474 464 340 340 116 394 465 377 377 123 123 198 22 283 455 38 162 232 482 172 115 273 106 405 405 405 405 206 169 402 402 402 6 272 472 472 482 482 482 115 273 106 153 153 387 387 387 139 139 302 302 375 375 98 13 321 247 247 126 126 326 326 326 326 101 149 149 228 321 321 127 45 45 45 35 259 127 5 5 455 129 129 259 354 354 180 486 365 365 365 365 360 200 200 243 243 233 270 270 270 390 390 390 390 97 97 225 225 225 373 155 261 487 288 288 278 330 339 64 310 447 447 238 272 397 345 333 220 220 402 221 129 401 321 354 425 425 241 431 374 374 374 203 53 53 473 176 176 135 328 200 200 248 248 248 364 276 276 346 346 265 428 85 85 139 293 293 293 122 122 401 401 401 310 310 107 107 395 351 470 264 264 468 468 468 337 337 324 422 36 36 161 161 487 487 288 41 41 246 318 49 453 342 168 89 116 116 33 394 394 478 66 68 68 68 26 26 251 241 81 278 278 278 203 53 250 250 250 250 250 276 346 346 428 428 428 146 252 36 472 221 401 401 321 321 354 354 498 498 498 498 396 143 36 310 107 107 395 50 50 50 50 185 185 433 433 160 112 427 247 247 126 126 292 326 326 101 101 149 149 228 289 321 320 347 347 347 186 162 232 232 68 172 115 273 204 204 204 204 280 29 495 134 302 302 497 497 349 349 234 234 261 25 213 213 213 252 252 449 34 255 255 8 259 354 180 230 230 319 173 402 402 198 127 222 222 222 222 222 313 58 72 72 110 110 120 120 120 120 120 37 37 24 471 270 270 433 433 160 18 112 112 56 56 491 28 28 28 491 28 491 28 491 362 362 491 362 362 362 491 491 362 491 491 211 211 102 491 369 369 369 369 21 21 21 21 21 101 101 149 149 321 321 320 127 5 5 236 129 36 310 107 395 351 91 91 91 91 85 85 85 85 139 293 293 122 122 131 472 221 401 321 74 441 189 189 240 285 34 180 113 113 113 113 167 285 449 449 156 156 156 313 58 58 72 72 294 294 294 294 294 294 294 282 388 64 64 212 34 89 89 322 53 212 32 259 354 380 380 189 496 496 274 143 458 192 180 230 230 230 169 169 352 29 44 44 245 8 32 321 354 190 380 288 365 365 365 365 330 388 64 76 76 310 107 107 395 462 462 462 402 402 133 276 276 346 346 486 315 315 315 139 450 293 122 122 131 472 221 401 321 75 74 425 425 386 386 386 431 319 319 319 203 203 381 381 381 381 381 117 198 198 127 45 45 45 236 401 401 401 321 354 190 380 499 151 151 169 169 99 447 447 238 6 272 34 255 416 192 180 432 432 330 379 77 77 342 342 198 22 283 283 38 162 342 115 273 265 265 265 85 146 146 325 34 69 130 130 198 22 283 455 8 259 354 106 151 151 151 416 416 192 41 41 41 41 19 19 454 454 229 321 247 312 15 15 15 15 15 15 15 193 193 193 193 17 +103-1241-0030 103 773 1 19 40 5 25 12 4 33 1 9 39 36 33 5 16 5 23 1 18 38 5 33 11 19 11 12 4 33 30 20 23 20 25 19 26 7 33 16 14 24 12 5 9 4 26 22 6 23 38 8 33 5 25 11 23 15 31 20 24 15 22 39 36 34 19 26 22 5 37 32 20 4 31 22 33 1 38 13 23 1 25 7 1 8 11 5 25 27 1 31 13 11 24 4 34 39 36 1 18 38 8 5 9 30 8 11 5 37 22 6 30 31 1 5 9 30 8 11 1 6 23 19 25 38 8 33 38 19 12 5 23 5 37 23 20 24 19 31 33 20 37 15 23 1 16 6 4 1 4 2 5 2 2 2 5 3 2 2 6 2 10 37 3 1 2 2 2 2 1 3 4 9 2 8 4 5 2 4 5 4 2 3 2 2 2 2 5 4 4 4 8 7 5 4 5 2 1 2 4 5 7 4 3 3 3 2 3 3 2 5 4 5 6 5 4 6 5 2 4 34 7 2 5 1 2 16 23 11 1 2 4 17 5 7 2 3 3 5 3 3 5 37 5 3 7 3 6 3 8 3 2 5 7 3 5 9 16 5 7 4 11 5 2 10 5 3 6 5 7 5 2 1 2 3 9 4 7 3 5 8 3 7 4 3 5 9 10 7 17 17 17 296 363 363 363 363 51 51 51 184 184 321 184 321 321 209 188 430 430 430 430 342 430 430 430 430 33 64 212 127 114 92 92 92 92 167 457 401 401 401 321 354 354 219 219 485 485 485 374 374 285 285 469 469 349 393 234 155 262 262 100 100 100 100 375 98 98 98 13 13 13 442 442 442 491 442 442 442 442 442 491 102 2 2 2 2 201 40 305 305 305 305 305 40 366 366 102 316 316 316 491 102 491 305 102 102 289 289 321 321 320 181 181 181 181 181 35 449 430 430 430 430 198 114 114 92 92 92 167 457 35 401 75 161 161 161 161 487 487 487 213 213 246 246 246 246 301 26 251 251 241 444 444 444 444 360 360 339 199 176 135 135 135 135 200 200 464 113 113 113 113 167 167 349 155 155 165 165 165 165 466 22 22 283 455 455 259 354 354 180 376 376 365 365 365 328 200 243 76 458 192 483 14 411 411 297 297 297 297 297 297 293 293 497 497 43 364 364 276 346 346 346 428 428 428 146 252 143 36 449 89 89 446 446 33 251 251 251 241 241 431 171 171 171 252 186 39 342 342 342 224 41 41 324 324 301 399 473 476 476 476 476 143 458 192 219 152 152 422 349 164 164 164 214 214 360 360 200 200 248 321 144 192 69 223 223 223 223 223 37 173 352 352 352 402 99 338 400 400 400 400 464 464 464 145 376 376 376 460 169 169 342 342 86 105 6 96 96 272 427 56 247 247 312 126 126 292 292 292 292 292 23 23 23 23 23 260 260 260 260 260 260 391 391 163 491 316 316 316 316 316 316 316 73 289 7 7 7 364 276 276 109 109 139 139 293 293 293 413 309 479 331 331 315 315 315 315 315 450 450 16 98 98 98 13 13 13 229 247 312 312 126 126 126 23 23 23 23 23 260 260 391 391 47 491 491 316 316 80 321 373 412 412 287 287 287 284 306 306 85 438 240 325 34 242 242 94 199 331 84 84 84 16 16 16 16 98 98 98 98 263 13 225 225 225 225 225 225 80 80 80 321 373 66 66 172 179 179 179 179 179 314 196 196 473 65 329 329 329 329 329 164 164 485 485 485 485 485 374 132 98 417 417 417 417 417 417 170 170 170 28 28 28 491 28 491 491 2 491 491 2 491 2 435 2 2 2 366 321 305 305 40 435 40 201 435 435 435 435 435 289 289 321 320 364 276 346 346 265 428 85 146 464 464 44 44 44 8 401 401 321 354 190 190 380 380 499 265 265 265 85 85 146 146 252 325 449 34 255 130 130 402 402 401 321 321 208 208 441 441 441 153 153 153 372 372 396 396 271 186 54 433 433 390 112 427 56 247 247 126 126 326 326 326 326 326 326 326 101 101 101 149 149 228 321 412 44 44 44 8 129 129 259 190 190 190 190 79 380 380 499 499 265 265 265 85 85 85 146 146 146 24 131 335 14 14 226 321 209 411 287 297 297 297 297 297 297 297 293 293 175 175 81 81 340 340 116 33 33 90 250 250 364 364 276 346 346 346 428 428 428 146 358 358 36 131 472 397 345 333 333 220 220 216 44 44 44 251 251 251 251 251 251 241 241 431 266 266 266 266 173 173 173 402 402 402 26 359 359 81 81 324 324 324 301 339 399 217 217 217 217 217 473 65 278 278 31 162 342 86 86 86 6 6 272 41 324 324 324 301 4 4 4 280 470 470 403 171 171 171 171 464 139 139 302 375 375 375 98 263 13 78 78 170 491 421 15 15 193 193 17 +103-1241-0031 103 654 1 8 37 25 13 37 14 31 20 25 38 5 25 1 9 5 33 8 22 5 25 19 24 4 21 5 25 38 5 33 32 20 38 35 11 23 35 22 23 8 22 1 8 11 27 25 33 13 37 14 19 22 31 29 13 22 33 19 9 20 5 9 30 8 11 24 8 31 13 23 16 1 8 24 31 27 18 27 24 23 20 25 27 9 5 11 20 38 5 23 13 37 14 38 6 25 33 5 24 13 30 20 1 24 20 1 5 25 23 13 31 19 33 24 8 33 9 20 5 16 6 30 5 25 24 19 32 5 25 13 30 20 1 8 31 5 29 27 40 5 16 6 30 5 25 24 19 32 5 25 13 30 20 24 8 33 5 25 33 9 20 37 13 30 20 29 14 33 19 22 39 5 23 14 1 11 8 1 3 2 3 3 6 4 4 2 3 6 13 2 1 3 4 5 2 1 3 5 5 5 2 2 2 2 2 4 2 3 1 2 5 2 5 2 7 6 24 7 2 3 2 2 2 5 3 2 3 3 2 3 2 3 2 3 4 2 4 3 4 3 2 5 7 3 6 5 14 8 3 5 3 5 5 3 3 4 3 4 2 2 1 3 2 3 7 3 3 3 3 2 3 3 2 4 5 2 4 2 2 11 18 6 2 2 2 4 2 1 4 4 2 3 3 2 6 3 2 3 2 2 1 6 2 2 4 2 7 7 9 3 2 5 3 4 2 6 3 3 3 2 2 2 5 2 2 3 2 4 4 5 2 3 2 2 2 6 2 2 3 5 2 2 3 2 4 2 2 3 6 12 17 17 17 296 363 363 363 51 51 51 228 321 412 287 111 111 111 202 202 202 196 309 479 463 463 463 463 29 382 313 186 162 232 68 68 267 267 267 267 267 339 339 250 250 250 276 174 174 174 319 388 388 303 117 229 229 247 126 126 326 326 101 101 149 149 228 321 289 321 159 159 159 159 325 34 111 111 111 438 438 143 458 445 351 242 116 199 199 255 255 399 217 217 473 65 486 486 486 460 460 240 310 310 107 395 242 242 203 250 250 181 181 181 181 99 338 338 400 400 152 378 378 345 389 389 314 26 26 251 241 241 367 367 367 367 35 458 96 26 386 266 266 266 266 266 266 146 358 143 458 192 419 439 78 170 170 491 28 491 28 28 491 491 2 491 2 102 2 102 102 102 491 491 435 305 289 289 321 209 287 111 111 111 438 438 325 34 180 84 350 413 348 131 34 463 463 463 463 402 29 29 495 467 467 154 154 458 96 96 232 68 105 336 470 470 151 151 178 35 401 75 272 87 87 8 354 420 420 420 464 464 44 255 8 259 259 190 380 380 499 499 428 85 146 146 35 196 196 473 46 46 46 46 438 186 162 68 68 68 115 273 279 279 279 279 279 279 279 375 375 352 352 352 427 491 247 491 312 126 292 23 23 23 101 101 149 149 228 321 321 287 287 111 111 111 438 203 53 394 478 232 172 115 344 344 344 344 274 58 72 72 437 350 350 350 350 350 203 53 250 250 359 359 166 166 324 324 301 10 479 331 231 231 231 274 8 354 29 469 325 41 324 301 378 345 345 389 139 497 175 335 14 14 209 145 463 463 463 463 280 29 382 245 245 43 364 174 174 174 330 348 76 465 75 377 87 87 399 217 473 65 65 264 264 264 468 468 337 337 337 324 324 301 217 473 429 429 429 429 429 246 246 246 19 454 229 321 247 126 126 126 326 326 326 326 326 23 408 408 408 149 149 391 491 289 321 412 287 287 319 319 348 175 81 431 443 443 31 342 342 177 177 177 177 457 70 70 65 65 428 428 146 215 35 259 420 420 420 464 464 44 44 349 234 234 261 25 148 148 148 372 372 467 467 242 348 250 217 473 473 278 278 99 99 436 436 60 60 116 94 199 470 264 264 468 468 468 337 41 41 19 454 454 417 417 417 417 237 237 80 80 321 412 287 287 287 111 111 438 438 31 342 224 494 494 129 74 74 437 496 496 496 274 368 368 9 168 494 44 349 234 234 205 261 148 148 148 372 372 372 467 467 446 116 250 250 473 473 278 278 99 99 436 60 60 242 116 94 199 264 264 468 468 337 337 41 324 301 399 70 473 65 428 428 146 146 457 35 401 196 242 33 33 394 32 32 259 420 420 420 420 420 324 301 173 280 104 104 104 104 337 337 337 337 301 129 321 74 492 492 236 384 371 278 278 278 143 321 192 485 134 134 134 175 81 300 334 59 452 263 229 229 491 312 15 15 15 15 15 193 193 193 193 17 +103-1241-0032 103 700 1 9 5 33 8 11 36 18 27 29 12 5 33 31 5 24 11 15 8 32 4 23 1 18 4 37 5 38 8 33 11 30 13 31 1 12 4 33 1 19 40 24 8 18 8 5 31 33 8 11 20 23 5 37 14 34 23 20 9 23 19 31 1 8 21 19 31 33 23 5 37 29 30 19 33 20 22 23 27 12 40 1 4 25 11 8 37 25 13 37 14 18 4 11 5 29 30 19 33 20 11 30 13 31 19 25 24 8 23 8 16 12 5 33 8 22 5 25 30 19 24 13 24 9 14 1 9 5 33 5 37 22 6 30 31 1 19 33 31 1 6 23 12 5 24 6 30 33 5 23 35 22 16 6 30 38 14 11 33 36 19 40 5 25 33 19 33 1 4 25 11 12 13 25 1 14 3 1 3 6 3 6 5 2 3 2 2 3 6 3 4 2 5 5 5 2 3 2 6 2 2 3 6 6 3 5 2 5 13 14 3 7 3 1 4 4 3 4 13 6 3 3 3 5 4 4 3 2 2 6 4 2 2 3 2 5 9 23 7 3 3 2 3 2 3 2 6 2 1 2 2 6 3 6 3 9 14 5 1 3 5 3 2 2 2 2 5 2 2 2 5 1 2 2 3 3 3 2 5 1 2 3 4 6 5 4 2 1 2 3 4 2 4 2 2 3 2 4 3 8 24 2 1 2 1 3 4 3 4 9 14 5 2 3 1 6 4 2 2 5 2 2 2 3 3 2 5 5 2 3 2 2 2 4 3 3 3 2 1 3 3 6 17 5 2 2 2 3 6 5 17 17 17 296 363 363 51 51 51 184 184 184 289 321 320 354 159 159 240 285 34 111 111 111 111 438 236 35 75 371 371 374 374 132 132 88 58 72 72 496 496 496 496 215 35 96 26 34 45 45 45 31 478 478 68 68 68 115 273 231 231 231 203 53 64 212 384 93 93 93 93 464 464 111 111 111 111 438 99 99 338 395 389 389 389 497 129 401 259 74 483 58 72 110 202 202 202 202 173 402 44 44 44 43 43 364 276 276 346 346 428 428 146 146 358 457 401 401 321 75 161 161 79 487 288 443 443 120 271 271 150 39 433 433 433 160 160 160 112 439 56 56 47 47 491 47 491 47 491 491 316 491 73 73 289 289 321 320 127 114 92 92 92 92 92 240 385 35 131 335 226 188 188 356 356 281 342 9 196 70 70 46 46 46 46 438 58 58 72 72 72 72 72 72 72 72 437 265 428 428 146 146 146 464 459 459 459 31 39 86 86 6 272 106 486 428 85 146 438 239 36 371 485 485 286 139 139 175 175 69 462 462 130 29 498 498 498 498 169 164 164 26 26 359 81 324 324 301 8 259 354 425 386 81 459 271 271 271 39 39 433 390 390 160 112 427 491 247 312 126 126 292 292 292 326 326 326 326 326 326 326 326 326 101 101 149 149 149 228 321 321 287 287 111 111 438 438 143 36 107 395 494 31 342 342 26 26 26 241 266 266 266 266 173 402 401 401 401 259 74 190 487 278 278 325 34 324 324 143 458 321 208 208 208 386 386 431 496 496 496 496 274 274 216 164 270 270 433 160 18 427 56 56 47 47 491 47 47 491 2 2 491 316 316 321 73 289 321 435 209 83 55 55 322 322 212 34 111 111 111 111 438 202 202 402 196 479 331 463 463 463 29 29 382 58 58 72 110 110 254 240 325 34 44 44 129 129 259 74 190 487 278 278 325 324 324 324 236 239 259 161 79 487 288 443 443 169 342 342 224 340 340 340 250 217 70 46 46 46 46 46 438 251 251 241 431 431 428 428 428 146 146 186 402 352 342 224 45 45 325 325 111 111 111 178 458 192 242 242 116 33 250 456 456 456 456 456 399 217 473 65 432 432 330 203 53 212 212 29 334 334 59 59 452 263 229 321 321 312 312 126 292 292 1 292 292 1 1 1 1 1 23 260 260 408 408 391 391 391 289 289 321 320 159 159 159 285 255 255 402 402 458 144 441 441 441 153 153 372 372 396 186 186 54 54 86 112 427 56 56 201 201 201 201 201 201 201 201 201 201 201 321 435 435 435 320 209 177 177 177 356 356 342 483 14 226 321 411 297 297 297 297 297 297 293 122 216 22 283 455 399 399 138 138 138 138 372 396 313 449 377 87 87 87 251 241 367 367 367 367 458 96 393 393 234 234 261 25 148 148 148 372 245 43 345 109 109 313 236 36 75 108 377 485 489 378 88 356 356 356 281 342 430 242 242 116 212 131 277 277 277 277 277 385 233 75 419 427 56 56 170 170 312 312 292 292 292 1 1 1 408 408 408 408 305 321 209 83 55 55 322 67 466 127 361 361 361 361 361 388 195 117 229 229 247 126 126 193 193 17 +103-1241-0033 103 787 1 8 22 5 25 19 24 4 21 5 25 12 4 33 8 24 11 30 13 31 33 17 6 30 21 5 31 23 20 1 12 19 31 24 6 30 25 19 26 38 13 25 8 23 13 16 33 12 20 5 31 8 23 5 24 8 16 13 23 33 31 27 5 32 15 24 11 9 19 22 5 40 8 18 4 11 33 5 38 13 30 12 19 31 18 6 30 5 11 27 23 11 1 38 19 25 31 20 11 30 13 31 1 6 23 12 20 6 30 16 5 25 40 18 4 11 33 5 38 13 30 12 5 24 39 36 25 27 1 5 24 14 10 5 25 33 19 25 2 23 4 31 38 19 25 33 14 11 27 25 15 33 19 11 34 30 20 18 5 25 11 14 11 39 3 30 11 40 5 37 38 19 25 31 20 33 19 12 20 5 31 8 23 5 24 1 31 5 24 29 20 29 5 23 31 13 11 19 33 38 5 40 9 19 22 5 40 18 20 22 35 11 5 25 31 13 23 19 33 1 7 4 3 2 2 2 4 4 5 2 2 1 2 3 3 2 3 2 2 6 2 4 4 3 3 2 7 2 8 21 3 2 5 4 2 2 2 3 3 2 2 2 4 3 3 3 1 2 2 4 6 6 4 2 3 4 5 2 3 2 6 4 2 9 6 3 3 2 2 3 3 3 3 4 2 2 3 3 2 2 2 3 2 4 8 4 4 1 3 4 4 2 1 3 2 4 5 2 4 3 4 10 27 7 3 3 3 3 3 4 2 2 2 3 2 2 2 2 2 3 2 3 2 2 2 3 3 10 15 5 5 4 4 2 1 2 2 2 18 5 4 6 3 2 1 4 2 4 4 2 4 3 2 2 5 3 6 4 3 2 2 3 2 4 7 3 4 1 2 4 2 2 4 4 1 3 2 2 4 2 5 5 4 2 6 21 6 4 2 2 3 3 2 2 4 2 2 1 3 1 2 3 2 2 4 2 2 2 2 4 2 2 2 2 5 3 4 3 5 9 17 17 363 51 51 228 321 209 111 111 111 438 458 192 192 242 116 199 255 255 399 217 473 65 486 486 460 240 240 35 310 107 242 298 116 379 466 45 45 45 285 34 111 111 365 203 203 394 212 161 79 487 288 443 169 150 39 86 86 238 6 336 90 221 321 144 208 153 153 153 387 372 396 313 24 310 107 459 459 271 39 433 68 68 68 359 474 474 474 474 19 19 454 454 417 442 442 170 170 28 28 491 2 491 491 2 491 2 491 2 2 491 316 316 73 289 321 321 7 127 258 258 31 162 342 142 142 196 217 70 65 153 387 387 396 348 94 176 176 328 200 200 248 345 409 409 409 94 199 111 111 111 438 251 241 431 443 443 169 169 352 402 198 198 448 448 464 464 255 38 38 162 232 68 68 115 273 265 265 265 85 85 146 175 175 81 81 242 203 203 53 65 111 111 111 438 349 205 205 261 25 189 139 139 293 122 478 478 66 68 172 115 344 344 344 344 88 88 255 255 186 99 338 338 338 338 338 395 470 290 290 290 290 290 434 339 53 394 212 401 221 321 354 420 420 143 458 192 278 253 368 453 342 168 111 111 111 438 72 110 110 254 254 240 35 321 377 87 87 87 43 364 109 109 264 264 313 216 216 114 258 258 31 31 342 142 142 72 72 72 72 72 72 437 153 481 306 372 406 467 467 469 240 285 34 106 106 424 424 424 424 497 122 122 133 401 321 364 276 109 278 330 348 33 394 77 77 342 224 41 324 324 301 236 321 75 161 79 79 288 288 443 120 271 271 39 39 433 390 160 160 112 427 491 247 312 126 292 292 292 292 326 326 326 326 326 23 23 23 101 101 101 101 101 149 149 228 289 289 321 289 209 209 287 297 297 297 297 297 293 293 216 22 448 448 448 378 106 153 372 372 372 349 349 205 261 25 242 379 379 471 77 342 110 110 110 460 240 314 35 384 87 87 43 43 276 109 109 468 468 240 216 216 57 57 203 217 473 219 219 152 374 116 94 331 84 84 84 84 16 274 98 98 13 13 414 491 170 491 170 187 491 187 187 23 23 101 101 149 149 149 321 209 44 44 44 399 217 70 473 65 498 498 396 313 35 310 107 107 242 116 116 199 34 89 446 116 33 58 58 72 437 496 496 496 496 215 35 35 96 270 342 224 242 242 116 33 466 466 241 431 376 376 376 169 150 150 86 238 272 397 397 109 109 278 278 64 76 449 300 382 313 236 239 259 384 371 84 496 496 413 94 199 158 158 158 252 325 449 191 191 191 314 36 164 119 161 161 487 487 487 337 213 213 324 324 3 3 58 72 72 437 319 319 319 348 64 212 300 382 313 313 314 314 219 219 219 180 180 106 306 306 306 306 306 396 396 37 37 24 77 270 168 168 462 462 402 402 402 345 109 109 330 116 33 394 77 77 224 41 41 324 236 108 377 123 123 216 283 448 448 464 464 255 38 162 68 115 115 106 265 265 265 85 146 299 175 175 81 275 203 203 381 117 48 13 491 491 312 312 126 292 292 292 292 292 21 21 21 23 23 23 260 408 391 391 391 321 321 373 66 68 115 273 231 231 231 319 53 76 76 74 485 213 213 301 8 354 100 497 497 186 162 68 115 273 470 443 240 325 177 177 177 457 345 141 141 281 9 221 336 354 420 420 143 259 144 27 351 368 368 342 224 30 30 422 143 144 27 389 389 389 314 196 242 242 33 394 478 232 68 172 115 273 443 443 139 175 175 175 81 277 277 37 385 131 404 321 247 247 126 15 15 193 193 193 17 +103-1241-0034 103 845 1 9 5 33 8 11 30 4 12 14 9 19 23 20 37 12 5 33 19 33 38 5 40 7 33 5 37 12 5 22 8 25 11 25 5 31 5 37 18 19 40 18 3 30 33 38 35 11 5 25 33 39 36 1 38 13 25 38 20 17 3 33 3 25 12 5 33 30 15 25 1 8 16 13 23 33 13 40 19 16 1 13 37 30 20 9 3 11 20 24 5 31 33 9 20 23 35 22 19 26 4 33 24 20 5 25 11 29 19 33 20 19 26 24 20 1 9 5 33 8 21 5 31 33 38 13 25 33 5 38 14 22 5 25 11 19 24 4 21 5 25 11 12 5 33 8 18 4 11 3 25 12 5 24 27 31 33 1 9 39 36 33 5 16 5 23 29 15 23 9 23 36 31 19 23 22 11 30 13 31 1 9 19 22 5 40 38 19 25 39 36 3 30 19 24 4 21 5 25 19 26 39 36 24 8 33 13 40 38 13 23 19 24 4 21 5 25 31 5 24 34 19 26 38 14 34 18 38 8 23 1 9 3 1 2 6 4 2 3 3 2 3 2 5 5 2 2 1 2 1 2 2 1 4 4 2 2 2 1 2 7 4 1 2 1 2 4 2 2 1 2 3 5 4 3 4 2 1 2 3 2 2 4 7 29 4 2 1 2 3 4 3 3 2 3 1 3 6 2 7 5 10 5 4 3 2 3 2 4 2 5 1 6 3 3 2 3 3 2 3 3 3 2 2 3 3 4 3 4 2 4 2 3 2 4 1 2 1 6 2 4 5 2 5 2 9 23 3 2 2 8 4 3 2 2 2 3 2 3 3 4 3 3 1 2 1 3 3 5 5 2 2 1 2 1 3 5 9 2 3 3 3 2 1 3 4 3 4 10 2 8 6 2 4 4 2 4 12 10 5 8 5 9 10 3 3 7 2 3 4 9 23 2 4 2 3 3 1 3 2 3 5 7 4 3 4 4 5 2 1 3 3 3 2 3 4 3 2 3 3 2 3 2 4 3 5 2 2 3 2 2 3 2 4 3 4 2 3 3 8 4 13 17 17 296 51 51 184 184 184 289 321 320 159 159 240 199 111 111 111 111 111 438 314 133 133 147 380 180 486 443 240 240 216 300 300 382 245 8 354 255 255 251 251 251 81 444 444 444 444 246 252 173 198 164 45 45 45 34 177 177 177 345 141 141 281 453 342 168 180 113 113 113 285 285 69 223 130 198 22 283 455 455 129 259 144 27 437 480 480 480 146 299 339 64 10 459 459 459 271 342 342 224 69 223 130 280 257 257 257 31 9 142 142 72 72 437 306 306 306 306 306 396 396 385 233 131 133 133 430 430 430 430 430 430 430 430 430 212 131 219 219 219 477 477 477 374 132 132 98 48 13 170 170 170 491 312 312 28 341 341 341 341 341 12 12 12 21 21 21 21 23 23 101 101 149 391 391 73 289 289 321 7 70 409 409 409 399 53 473 429 30 422 143 458 144 180 189 405 405 206 285 34 125 125 125 348 466 22 283 455 236 36 161 161 161 161 487 487 288 290 290 290 290 434 434 434 339 195 404 229 82 247 126 126 326 23 101 101 149 149 321 321 287 111 111 111 349 205 261 25 189 189 139 293 122 35 449 34 253 253 453 453 342 118 118 118 118 402 402 14 226 321 209 411 145 204 204 204 204 204 204 29 337 337 337 301 8 259 354 109 151 240 325 34 41 324 301 399 217 70 65 151 169 150 342 105 221 259 354 420 420 301 301 251 251 241 367 367 367 367 367 458 192 192 176 135 200 200 464 415 415 415 415 457 457 217 429 429 429 464 464 89 203 394 129 401 321 75 74 351 278 278 278 325 449 41 41 324 324 324 464 434 135 328 328 200 248 248 248 429 429 429 429 429 19 19 454 417 417 170 170 170 170 28 491 28 2 491 491 2 491 2 2 491 2 316 491 491 316 73 289 289 321 321 354 159 159 159 285 34 111 111 111 111 111 111 438 438 239 384 371 180 151 151 31 54 54 142 397 397 109 109 189 330 457 394 465 108 377 87 87 43 364 276 109 372 498 396 396 178 458 192 89 340 94 199 255 255 399 217 473 65 486 486 486 460 240 240 36 310 107 242 275 275 116 195 466 45 45 45 45 325 34 111 111 111 111 438 438 58 72 72 72 110 110 110 110 254 254 240 285 34 106 125 125 125 125 466 22 283 455 399 70 65 496 496 496 186 186 238 6 6 472 221 401 401 47 47 491 491 47 80 491 80 401 321 354 354 485 485 219 219 219 485 485 374 374 374 132 132 132 285 449 469 469 469 349 349 155 262 262 100 100 100 497 497 122 129 401 401 401 401 321 75 74 74 437 351 351 290 290 171 171 171 171 171 139 139 139 139 497 497 497 497 122 32 32 32 401 401 321 354 425 425 425 241 431 431 374 374 374 374 132 132 132 132 186 162 232 232 232 68 68 172 115 273 278 278 139 139 293 122 458 458 96 472 221 401 401 75 161 79 487 288 443 443 169 271 150 39 433 433 433 433 160 427 247 247 247 126 126 126 326 326 326 326 326 326 326 326 326 326 101 101 101 149 228 228 321 321 321 354 420 420 422 143 458 192 485 278 368 453 9 397 345 409 409 409 67 219 219 152 152 152 152 14 14 411 411 284 284 284 353 353 353 396 406 467 467 255 255 255 399 217 473 65 486 486 365 460 240 310 107 107 447 242 94 199 176 176 328 200 248 248 219 152 152 152 378 399 70 65 428 428 428 146 143 449 34 253 253 9 142 397 336 109 109 139 139 175 175 81 255 255 217 217 65 486 486 460 240 240 36 310 107 242 242 116 394 478 66 342 224 231 231 231 76 76 198 214 214 214 328 200 248 250 364 276 109 498 498 396 169 164 164 133 364 276 276 346 346 346 265 85 85 85 355 355 375 375 375 98 229 229 321 247 15 15 15 15 15 193 193 193 193 17 +103-1241-0035 103 777 1 4 25 11 5 1 9 19 17 18 4 33 1 6 23 16 23 7 14 40 5 25 11 25 3 11 19 26 29 23 36 24 40 1 5 25 11 5 17 27 23 11 38 3 10 5 25 11 1 22 19 11 17 23 5 37 40 5 25 11 9 36 33 31 1 8 16 13 23 33 10 19 30 11 5 29 1 30 8 33 5 38 15 1 4 25 11 8 19 25 21 28 11 24 8 33 30 19 29 33 19 12 20 8 23 5 25 11 38 19 34 1 6 23 24 8 24 8 33 1 8 38 5 40 5 25 33 5 9 19 33 31 19 22 5 24 19 26 27 37 14 19 25 12 5 9 27 33 1 25 8 12 14 38 5 40 24 19 31 19 40 31 29 13 25 31 14 1 6 23 12 27 32 20 21 13 25 14 5 23 20 19 40 1 14 4 2 2 6 4 3 5 4 9 5 5 1 9 5 9 2 9 3 5 2 2 2 3 6 1 2 5 7 3 6 4 6 7 5 1 2 2 6 4 6 4 4 6 6 2 1 4 3 5 3 4 3 4 4 3 3 2 2 3 6 5 5 11 8 10 5 2 3 3 6 2 2 3 4 3 2 3 4 3 2 4 8 11 6 1 2 5 2 2 4 5 1 3 4 4 2 2 2 3 2 3 4 6 2 2 1 3 2 1 5 1 7 4 3 6 4 8 9 44 7 2 4 3 1 2 1 2 7 3 4 6 3 8 2 2 2 4 4 4 2 2 2 1 3 3 9 5 15 3 5 2 2 3 1 4 1 2 3 2 2 3 3 1 4 5 6 11 4 2 2 3 4 2 5 2 2 4 2 3 6 5 13 8 17 17 17 296 363 363 363 363 51 51 51 51 228 321 321 83 55 55 322 67 131 44 44 44 236 32 401 401 401 401 401 401 321 354 278 278 278 278 278 360 252 416 458 192 445 183 72 72 72 72 110 110 486 486 486 460 460 240 35 131 483 226 226 226 321 411 287 297 297 297 297 297 297 297 293 293 293 122 349 349 234 234 234 234 261 425 425 386 386 431 486 315 315 315 450 88 372 372 304 304 304 368 269 342 342 89 89 446 446 33 10 10 309 479 331 331 284 405 405 206 240 325 176 176 328 200 200 248 248 76 259 74 74 425 425 425 386 386 431 374 374 374 374 374 434 203 381 471 471 49 433 433 97 427 247 247 126 326 326 326 101 149 228 321 83 55 55 322 67 34 44 44 236 129 259 144 27 424 424 424 424 424 424 424 424 424 497 122 122 131 133 133 364 276 346 346 346 405 405 206 206 169 35 36 107 107 395 89 89 446 33 394 90 90 401 401 401 321 75 445 445 445 351 278 278 240 314 90 401 401 321 144 208 425 386 431 431 431 266 266 173 173 402 270 270 342 224 89 89 446 33 33 394 32 32 401 401 401 354 354 374 374 374 374 132 233 385 233 270 270 270 390 390 390 18 112 56 56 56 47 47 491 47 47 47 491 491 435 435 321 435 435 435 287 287 111 111 111 438 349 205 205 261 25 189 139 139 293 167 457 401 321 75 310 107 107 395 286 286 468 468 313 313 285 34 230 230 230 230 215 35 402 133 147 147 147 499 499 428 428 146 146 325 34 255 255 43 364 109 109 403 403 403 207 19 454 229 321 247 126 126 326 326 326 326 101 101 149 149 228 412 83 55 55 55 322 212 34 111 111 111 111 438 121 121 339 394 212 107 180 106 153 387 387 146 252 314 196 217 46 46 46 438 438 129 36 161 161 487 288 278 173 402 96 36 272 377 123 123 216 22 448 448 448 464 464 145 265 265 85 146 146 175 175 81 242 116 212 133 133 333 333 220 220 335 14 14 226 226 321 209 297 297 297 297 297 297 297 293 399 399 70 46 46 46 46 46 438 438 399 217 70 65 265 265 428 428 428 146 358 358 233 36 227 419 419 439 417 417 170 170 170 491 28 28 491 28 442 28 442 362 491 362 102 491 362 491 362 362 102 362 362 491 362 491 362 218 218 491 218 102 369 369 369 369 369 21 21 21 101 101 149 149 228 289 412 287 111 111 111 378 345 141 141 141 141 281 453 342 242 242 116 212 131 44 44 44 32 32 32 321 354 354 278 278 278 385 457 478 478 232 68 68 172 115 470 470 278 120 385 143 458 458 144 27 351 319 319 319 53 176 176 135 200 200 464 106 410 410 410 410 173 280 29 29 495 467 340 340 116 466 22 283 455 8 354 354 180 496 496 496 496 496 274 274 37 24 227 419 427 78 78 170 491 187 187 292 23 23 23 23 101 149 149 228 289 321 320 479 331 265 265 428 146 240 216 300 300 378 43 345 141 141 281 9 221 196 473 258 258 258 342 224 494 494 31 232 232 105 105 336 470 432 432 330 379 64 77 342 224 224 334 334 334 59 452 452 263 321 247 126 126 23 23 101 101 101 149 149 321 321 287 297 297 293 216 216 114 84 84 186 186 338 400 400 400 422 239 310 107 395 395 432 330 94 199 495 495 495 467 134 134 359 359 166 166 166 324 324 464 464 356 356 120 120 271 185 433 433 433 433 160 160 112 417 417 417 237 421 421 491 421 128 128 128 193 17 +103-1241-0036 103 778 1 32 20 31 13 11 32 20 18 4 11 5 25 33 8 24 33 19 17 19 33 31 19 22 1 38 3 10 19 26 33 19 31 20 12 5 33 8 11 19 11 5 25 16 3 23 27 37 14 9 6 30 11 1 32 20 31 13 11 32 20 25 13 37 14 31 6 12 5 1 9 20 33 5 37 24 20 16 14 29 30 7 23 19 26 5 9 7 33 1 9 5 33 19 16 19 33 22 4 29 33 18 14 16 14 24 9 20 19 26 31 20 31 19 22 19 33 31 5 24 14 31 20 8 11 19 11 29 30 7 23 19 40 5 25 33 19 33 1 4 25 11 8 38 6 25 33 19 11 33 19 31 20 13 37 30 20 34 19 26 12 5 33 38 5 40 33 19 9 20 31 20 25 3 25 12 4 33 9 27 33 1 9 19 22 5 40 8 11 19 11 5 25 27 38 13 12 14 8 11 13 37 14 18 4 37 5 25 5 12 14 3 29 14 33 36 25 5 33 20 1 10 6 3 4 3 3 2 3 4 2 3 1 3 5 4 3 2 1 4 3 3 6 5 4 2 4 3 3 2 3 2 1 6 5 1 2 2 4 3 1 2 1 3 5 3 5 4 3 3 3 4 3 5 18 6 3 4 3 3 4 2 6 2 3 4 4 5 3 3 2 2 4 2 2 3 2 3 3 2 5 3 3 3 3 3 1 3 8 6 26 2 2 2 2 4 2 2 4 4 4 1 2 1 4 2 4 2 5 1 6 7 4 6 4 3 2 2 2 2 6 4 5 4 3 4 3 2 4 3 4 4 2 3 2 2 2 5 6 19 4 2 1 3 4 1 2 2 1 2 1 2 6 4 3 3 2 2 2 1 3 1 2 1 2 1 3 3 1 3 3 8 5 2 3 3 2 4 2 4 6 3 14 2 4 1 3 3 4 3 3 1 4 6 5 3 1 4 3 5 3 2 3 3 4 2 2 2 3 3 3 5 2 3 2 6 4 2 3 2 8 17 17 17 17 296 296 363 363 51 51 51 321 321 373 338 400 400 400 422 422 162 68 115 470 470 443 240 314 35 310 107 400 400 30 422 58 110 110 254 254 254 240 35 242 242 242 33 457 465 108 119 437 103 103 103 146 299 203 53 64 212 377 87 416 416 445 180 278 443 385 385 77 478 66 232 68 172 115 273 470 278 278 120 178 458 458 192 225 225 225 7 276 346 346 405 206 206 35 310 107 135 135 135 248 212 384 87 87 38 342 68 172 267 267 267 267 301 301 216 45 45 45 325 111 111 111 438 438 239 384 371 278 278 116 242 33 90 393 393 234 261 25 106 481 481 481 293 293 175 14 14 410 410 410 410 410 280 29 29 245 245 8 354 153 153 153 153 372 372 372 37 24 404 439 229 491 247 312 312 187 292 292 292 292 292 21 21 21 21 23 101 408 408 149 321 321 373 400 400 400 30 422 162 162 68 115 470 470 120 240 240 314 310 338 338 400 400 400 30 301 10 10 309 479 331 463 463 463 463 29 382 313 186 186 162 54 115 273 106 481 405 481 293 216 216 283 283 455 236 401 401 321 354 213 213 213 252 325 34 69 223 130 402 196 429 429 429 429 422 393 155 332 332 245 129 321 74 190 190 380 499 499 486 481 481 293 175 175 81 176 328 200 200 255 255 8 8 180 113 113 113 113 113 450 450 167 385 75 227 419 439 78 78 170 491 28 491 491 341 341 12 12 12 21 21 21 21 21 21 101 101 149 391 228 491 289 321 321 320 159 159 159 285 34 118 118 118 118 261 177 177 177 131 90 259 144 445 351 351 443 443 240 215 35 96 96 272 156 156 382 349 205 155 165 165 165 165 53 394 212 212 354 420 420 360 360 360 135 135 200 200 248 248 478 66 68 68 68 115 267 267 267 213 422 186 162 68 68 68 115 273 470 278 278 178 143 458 192 177 177 77 77 342 168 44 44 399 217 217 70 65 498 498 498 396 186 162 54 172 224 41 41 324 464 464 111 111 438 438 239 75 371 371 278 278 314 35 401 259 74 190 190 499 499 499 405 450 293 293 175 175 81 356 356 281 453 430 430 430 430 64 465 34 277 277 277 277 385 385 233 321 419 427 491 491 312 312 312 187 187 12 12 12 12 12 21 21 326 408 408 149 149 321 321 209 55 55 322 322 199 111 111 111 378 43 364 174 174 319 348 325 34 191 191 36 87 87 87 162 68 68 172 267 267 267 267 464 464 464 204 204 204 204 29 29 337 469 164 164 214 214 214 200 248 114 45 177 43 345 141 141 281 86 238 6 377 87 87 8 354 420 420 420 422 162 162 68 68 68 68 267 267 267 267 267 434 339 339 199 125 125 125 125 348 466 114 114 92 92 92 167 457 401 321 354 354 496 496 496 496 274 37 24 131 427 321 247 126 326 326 326 326 326 326 326 101 149 228 289 321 321 354 420 420 422 143 144 27 180 253 368 453 168 111 111 111 438 236 325 371 278 278 278 314 242 242 242 457 10 10 10 309 331 331 84 84 84 274 43 43 109 109 181 216 216 300 300 300 406 467 111 111 111 111 438 240 325 34 463 463 463 280 29 382 382 58 72 72 110 202 202 202 202 402 44 44 116 479 331 493 493 493 493 216 300 495 406 467 467 499 405 206 215 29 29 469 469 236 36 108 119 485 485 485 374 374 330 94 199 469 469 469 325 41 41 41 19 19 19 454 454 229 170 491 491 15 15 15 15 15 15 15 15 193 193 193 193 193 17 +103-1241-0037 103 638 1 27 1 12 13 30 14 5 23 3 33 24 6 30 1 10 13 30 20 33 30 20 40 6 23 19 25 9 23 36 24 1 12 19 31 1 8 23 5 25 11 19 40 12 5 1 2 29 23 15 31 8 21 19 31 33 23 5 37 19 33 6 23 30 13 11 20 4 25 11 8 24 31 27 17 23 4 11 8 24 17 27 19 26 33 5 23 19 37 18 20 30 1 8 37 6 23 38 20 40 18 14 11 12 5 33 29 30 19 25 31 13 11 38 14 11 8 23 5 25 11 38 5 40 12 5 29 30 19 33 20 5 31 33 29 23 15 31 19 25 12 5 38 14 23 11 1 18 43 5 3 4 3 5 4 7 8 3 5 3 4 2 10 2 3 4 5 2 3 6 6 4 3 4 4 7 10 6 26 3 5 5 1 12 4 3 1 3 1 5 1 5 2 28 5 3 6 8 4 4 2 2 4 3 2 3 2 2 4 5 4 3 3 5 1 2 2 3 3 8 4 7 4 4 2 3 4 4 2 3 1 3 2 4 2 4 2 4 9 19 5 4 2 2 3 2 4 4 2 3 2 2 3 3 2 1 4 3 3 3 2 2 5 6 2 3 2 2 1 2 3 1 2 6 2 2 1 4 3 3 1 3 3 4 5 1 2 1 3 5 3 5 4 6 17 17 17 296 363 363 363 51 51 51 51 491 491 184 491 184 289 321 321 287 287 287 284 284 284 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 98 98 98 225 98 98 225 225 225 373 164 164 289 321 164 127 127 114 0 0 264 468 468 406 406 467 467 406 467 467 467 44 44 251 251 251 251 251 241 241 431 431 284 284 405 405 206 206 167 457 457 196 70 70 70 138 138 138 138 372 313 313 236 401 401 401 401 75 310 107 107 107 395 395 395 395 264 468 468 406 337 337 324 324 422 143 36 161 161 487 487 41 41 324 318 49 9 9 483 14 321 411 297 297 297 297 297 297 297 175 175 81 242 340 203 53 53 394 76 401 321 354 425 425 425 241 431 431 374 374 374 374 374 374 132 132 132 413 203 381 381 381 404 229 229 247 312 312 126 292 292 292 292 292 292 292 23 23 23 23 23 23 23 101 408 391 491 491 491 289 289 321 127 114 258 258 258 258 31 342 342 342 483 14 14 321 411 287 284 265 265 85 85 85 146 146 139 175 175 175 81 242 275 116 64 212 131 356 356 356 281 342 198 198 22 283 455 455 129 129 401 321 354 425 425 386 431 374 374 374 374 132 399 53 473 324 324 324 464 459 459 459 31 39 86 86 6 272 472 336 321 74 74 425 425 386 386 343 343 343 343 358 358 358 39 39 433 68 172 115 97 225 287 111 111 438 143 36 107 395 494 31 31 342 26 26 26 251 241 241 266 266 266 173 173 29 277 277 325 285 106 106 297 297 297 293 293 42 42 147 147 380 288 443 443 240 325 325 41 41 324 324 3 464 89 322 94 199 111 111 111 356 203 53 478 478 232 232 68 68 115 344 344 344 344 344 274 274 32 401 401 321 208 208 386 386 431 431 376 376 376 240 285 34 111 111 203 203 53 394 90 465 144 180 84 496 88 176 135 135 248 36 377 87 87 87 251 251 241 431 278 278 173 173 402 183 183 286 286 286 286 264 59 59 452 263 263 417 414 170 170 491 170 28 491 28 28 491 2 491 2 2 491 491 435 435 289 289 321 320 305 287 287 111 111 202 202 280 106 297 297 297 297 88 109 109 469 368 31 342 142 142 72 110 498 498 498 498 313 240 314 198 45 45 45 457 129 259 259 74 190 487 432 330 330 64 64 77 342 342 168 145 329 329 240 131 133 345 345 109 313 285 335 14 14 145 284 265 85 146 146 139 175 81 81 275 275 116 133 133 345 141 141 281 453 342 198 22 283 455 129 129 321 74 190 190 487 487 278 278 325 324 324 464 459 459 459 31 86 86 221 336 321 74 425 386 343 343 343 343 252 186 39 342 342 340 340 340 340 466 22 283 455 43 43 276 109 109 498 498 498 139 139 375 375 375 122 122 131 427 229 247 15 15 193 193 17 +103-1241-0038 103 746 1 5 25 11 8 39 36 40 11 33 5 19 24 4 21 5 25 8 38 5 40 23 19 37 19 26 18 20 30 1 9 5 33 8 25 13 37 14 30 20 23 20 19 22 31 29 13 22 33 5 11 8 38 35 11 1 19 33 31 11 19 23 8 33 16 5 23 38 19 25 39 35 30 19 24 4 21 5 25 15 32 5 25 40 22 5 24 33 30 36 19 40 5 25 33 19 33 1 9 5 33 12 27 40 30 13 11 30 27 11 40 14 31 27 16 5 25 20 1 18 38 13 25 38 20 17 3 33 19 25 33 5 12 5 33 30 15 25 4 33 32 3 30 23 5 33 7 25 5 25 11 12 5 30 13 11 30 27 11 40 9 20 17 4 25 33 5 16 23 4 32 29 4 31 33 1 8 4 31 33 24 19 31 19 40 31 29 13 25 31 14 38 5 33 24 15 11 12 5 24 30 13 11 1 9 3 2 2 3 5 2 4 1 2 2 2 4 4 4 2 2 3 3 1 5 3 2 3 2 2 6 2 6 20 3 2 2 4 5 2 4 3 6 2 4 3 2 3 4 3 2 2 3 2 2 6 6 4 7 26 5 3 3 3 2 6 4 3 4 1 3 2 3 2 2 2 3 3 3 4 4 3 2 3 6 1 2 3 4 2 4 5 2 4 2 3 2 3 2 5 5 22 2 1 3 3 4 5 4 3 5 4 3 3 2 3 6 5 7 4 3 9 20 1 2 1 2 1 4 3 3 2 2 4 2 3 2 3 6 3 3 3 2 2 5 2 3 2 4 5 7 2 2 1 2 1 4 3 3 6 3 4 2 3 2 2 3 4 3 2 2 4 3 5 6 5 8 4 3 12 8 8 3 2 2 3 3 2 2 3 3 2 3 4 2 3 2 2 3 4 2 1 2 5 1 4 4 5 17 17 17 363 363 363 51 51 228 321 83 55 55 322 131 111 111 111 111 438 219 219 219 485 485 374 186 162 54 238 6 161 87 87 464 255 255 399 217 473 65 486 486 460 240 240 310 107 395 242 116 94 199 111 111 378 378 345 141 141 281 342 9 26 26 251 241 431 278 278 173 173 280 176 135 328 200 248 183 183 286 286 286 286 264 59 59 452 263 321 247 247 126 126 292 292 292 23 23 23 23 260 260 391 391 391 316 73 73 289 321 321 354 159 159 159 285 34 111 111 111 111 438 10 10 479 463 463 463 463 463 29 29 382 245 245 42 42 147 147 380 485 485 278 278 359 359 166 166 166 464 464 154 154 458 96 66 86 105 105 336 470 470 151 178 178 96 96 272 191 191 191 325 34 111 111 111 111 438 438 43 364 364 109 109 389 389 120 120 120 37 37 75 419 419 439 78 170 170 170 170 28 491 28 491 28 362 362 491 362 2 362 362 491 491 491 366 491 491 316 316 435 289 321 321 177 177 177 143 36 77 86 86 221 336 384 490 490 490 251 251 251 241 431 428 428 428 428 146 35 35 393 393 155 262 262 100 100 497 43 364 409 409 409 409 33 219 219 152 222 222 406 467 467 467 255 203 217 473 65 486 486 460 460 240 310 107 395 395 469 116 94 418 418 418 418 418 99 436 436 60 298 298 379 471 49 9 142 336 144 27 351 319 319 203 53 90 76 321 161 161 161 487 487 374 374 132 88 88 356 356 356 453 342 430 430 116 64 212 131 277 277 277 277 277 385 385 75 419 427 229 491 312 312 126 292 292 292 1 1 1 1 21 21 21 21 101 408 149 149 228 321 320 320 159 159 159 35 35 198 127 124 124 124 124 124 368 9 142 397 42 147 147 380 288 443 443 240 240 314 131 133 133 147 147 380 496 496 496 274 368 77 270 9 353 353 353 186 186 232 482 172 115 344 344 344 344 274 349 349 234 234 234 234 261 25 319 319 319 240 94 199 41 41 41 41 19 19 19 454 454 414 170 170 312 312 187 187 187 292 292 292 23 23 23 101 101 149 149 228 321 321 320 345 409 409 409 399 473 429 30 301 143 465 144 180 189 189 240 285 94 34 340 340 116 64 76 377 377 123 123 216 216 22 283 455 236 401 321 75 161 161 487 487 288 290 290 290 434 339 199 199 415 457 457 186 338 338 338 395 499 499 306 306 206 293 175 81 81 469 457 457 36 75 108 119 351 315 315 315 315 450 450 413 413 94 199 340 340 466 22 283 455 455 42 42 147 380 288 443 443 240 314 131 133 133 364 147 380 380 496 496 496 274 274 24 77 270 142 221 336 420 420 420 416 445 445 210 210 210 460 330 388 76 384 87 87 87 349 234 261 261 386 431 431 376 376 460 460 169 169 99 436 447 447 221 336 74 74 311 311 311 311 311 311 311 460 169 150 150 342 86 6 272 427 82 247 126 326 326 326 326 101 101 101 149 149 228 321 287 287 111 111 111 438 438 145 145 376 376 460 460 169 150 150 86 238 6 196 196 217 473 258 258 342 342 224 494 494 31 162 68 105 105 336 354 470 432 330 379 64 77 77 224 300 382 382 245 43 345 181 181 181 167 457 217 217 473 476 476 476 252 314 259 22 57 57 203 53 250 250 147 380 288 288 120 120 240 385 131 229 247 126 193 193 17 +103-1241-0039 103 806 1 4 25 11 32 20 31 13 11 32 20 11 19 11 25 33 25 27 5 25 11 16 14 29 19 33 20 40 31 15 22 25 3 33 5 4 31 22 18 14 13 25 20 24 6 30 22 38 13 31 10 5 25 40 1 32 20 31 13 11 8 24 5 31 33 18 4 37 4 31 33 18 14 5 34 7 40 5 25 11 6 23 30 13 11 20 1 8 31 5 29 27 40 8 18 4 11 33 36 9 5 33 1 18 7 39 36 17 27 19 26 33 19 16 8 25 11 7 33 5 9 7 33 34 19 26 40 19 16 39 36 11 27 25 33 4 31 22 38 13 31 10 5 25 40 1 4 25 11 38 5 33 11 5 40 24 15 22 12 5 30 27 11 40 30 13 11 1 38 13 23 25 7 1 8 11 5 25 27 1 31 13 11 24 4 34 39 36 1 6 3 2 3 4 3 6 2 3 3 2 3 2 3 2 1 3 4 2 2 1 2 3 4 2 3 3 2 5 4 3 2 4 3 2 7 3 2 2 1 2 3 2 4 2 3 4 2 2 3 5 2 3 4 15 6 2 4 2 1 5 3 3 2 1 2 1 2 6 3 1 2 1 3 6 6 4 1 2 2 4 4 3 3 2 8 16 7 4 1 4 4 5 6 5 5 4 5 7 4 2 3 17 7 6 5 1 3 2 3 1 2 1 5 5 2 3 4 2 2 2 6 3 2 2 3 3 2 2 2 2 3 3 2 2 4 4 3 2 2 4 4 3 4 10 22 3 2 1 2 4 5 4 5 6 2 3 5 2 3 4 4 3 4 3 4 7 37 18 6 3 4 20 22 11 2 3 3 17 8 7 1 3 3 5 3 3 10 19 17 17 296 296 51 321 412 55 55 322 322 67 478 338 338 400 400 400 30 30 422 186 232 232 172 115 470 470 240 314 36 310 400 400 400 30 422 236 384 371 371 278 278 314 196 242 242 457 309 479 331 84 84 496 88 88 89 446 203 393 155 332 332 332 245 129 259 74 74 278 278 278 325 41 324 324 324 186 162 232 68 68 115 470 470 171 171 252 143 96 196 479 331 307 307 61 167 457 36 377 87 87 14 145 145 376 460 460 169 150 86 105 221 458 208 495 467 467 475 475 475 475 475 301 399 70 138 138 138 138 372 245 245 129 321 208 441 151 151 151 151 169 99 238 238 310 107 60 298 298 298 379 471 471 270 160 427 247 247 126 126 292 292 292 292 23 23 408 408 408 408 391 321 321 373 373 400 400 400 400 422 162 342 115 470 470 240 285 34 111 111 111 438 399 70 65 65 151 150 150 86 6 34 202 202 202 280 145 145 486 460 460 169 150 86 238 272 161 382 467 44 44 44 38 164 401 321 164 180 180 486 315 315 450 450 169 269 9 168 242 116 64 131 34 106 297 297 297 293 293 42 42 147 380 288 443 443 240 325 325 41 41 19 19 454 454 414 321 247 312 126 126 292 292 292 1 23 23 408 408 149 149 228 321 321 209 287 111 111 111 438 31 342 342 494 494 494 129 74 496 496 496 496 496 368 368 453 168 180 111 111 111 111 111 438 58 72 72 110 110 486 486 486 460 460 388 64 314 401 321 108 108 119 374 374 374 374 374 132 132 132 8 8 354 159 159 159 159 314 229 247 247 126 126 326 326 326 326 326 326 326 326 326 101 101 101 149 228 321 373 72 72 268 268 268 268 268 88 430 430 430 430 219 219 152 152 416 144 27 106 88 350 360 135 339 212 87 87 349 349 261 25 480 480 480 85 299 299 299 64 212 384 180 180 486 113 240 285 285 255 255 8 354 180 113 113 113 113 167 167 164 164 164 214 214 214 200 200 471 49 342 9 118 118 118 280 30 30 30 422 239 371 180 84 350 350 413 285 34 145 145 460 460 169 150 86 142 221 336 208 441 151 151 151 151 169 99 447 238 6 310 60 60 298 298 275 379 471 471 471 49 269 433 160 160 112 112 56 56 170 28 28 491 2 2 2 2 289 321 373 373 326 326 326 326 326 326 101 149 149 228 321 321 83 55 322 322 399 250 181 181 181 181 181 35 401 401 401 401 321 384 371 180 71 71 71 71 368 368 453 342 86 221 196 196 473 476 476 476 143 143 401 401 198 22 283 455 455 42 147 380 380 288 496 496 496 274 37 77 77 323 142 397 336 147 147 380 288 120 120 120 37 24 24 419 439 439 78 417 417 170 170 170 170 28 28 28 28 491 362 491 491 362 362 491 362 491 362 491 362 362 491 40 40 40 40 40 40 40 366 366 366 366 316 316 249 7 7 7 7 7 7 7 7 7 7 7 7 364 364 276 276 109 109 84 443 139 139 139 293 293 413 122 309 479 331 331 315 315 315 315 315 450 450 450 16 16 293 98 98 13 13 13 13 13 78 491 170 312 312 126 23 23 23 23 260 260 391 163 316 491 316 289 373 225 225 225 442 287 287 287 287 287 287 111 111 111 438 438 24 325 34 84 242 116 479 331 84 84 84 84 16 16 16 16 375 98 98 98 263 13 13 13 78 47 47 47 491 47 80 80 321 80 373 66 66 172 179 179 179 179 314 196 196 70 65 329 329 460 460 169 349 352 25 485 485 485 485 485 374 132 98 98 13 417 417 417 170 421 421 491 421 421 491 128 128 128 491 128 128 128 128 128 193 193 17 +103-1241-0040 103 689 1 12 13 30 11 9 20 25 27 31 22 27 29 16 14 19 24 4 21 5 25 15 32 5 25 12 13 25 1 38 35 11 12 13 30 1 9 5 33 15 13 24 8 1 33 6 22 19 26 33 36 24 5 10 1 29 20 29 5 23 14 6 23 38 20 40 33 13 23 19 26 24 20 8 11 36 1 38 35 11 39 36 30 4 12 14 8 11 19 11 5 25 33 6 22 1 19 16 39 36 31 15 31 27 8 23 31 33 3 29 1 8 22 4 25 31 33 3 29 38 13 25 8 24 15 22 5 29 24 8 24 8 25 11 33 36 19 33 6 23 12 27 19 33 31 11 19 16 5 22 5 23 33 1 24 4 34 39 36 1 9 3 2 1 2 2 2 5 4 4 3 3 2 3 2 6 3 3 4 2 2 5 6 3 2 3 4 5 3 5 2 4 2 2 8 31 3 1 2 1 2 3 4 1 3 5 3 2 2 3 3 3 5 9 8 3 3 2 2 3 6 3 2 3 2 2 4 2 2 3 2 3 4 6 3 12 18 3 2 2 2 3 4 4 2 4 4 3 3 1 2 2 5 7 7 19 5 4 2 2 6 5 5 4 2 4 4 3 8 5 22 6 8 5 4 4 3 4 3 2 1 2 4 4 3 4 3 2 3 4 5 4 3 1 3 3 4 3 3 2 3 5 2 3 4 2 3 3 2 3 3 5 6 40 6 7 6 2 10 9 17 17 17 363 363 363 149 149 228 321 127 114 0 0 313 313 35 354 420 420 420 301 10 479 331 231 231 231 274 186 162 482 482 105 6 144 496 496 496 215 457 393 205 155 332 332 332 216 448 448 448 464 255 399 473 65 486 486 460 240 24 310 395 469 242 116 94 418 418 418 418 418 418 99 436 436 60 60 298 298 116 33 394 466 127 114 361 361 361 282 388 303 117 48 13 80 80 80 321 7 364 345 430 430 430 314 35 401 198 127 114 114 264 264 264 59 59 452 452 13 229 247 247 312 312 312 292 292 292 292 292 12 21 1 21 21 21 21 21 23 101 101 149 149 391 316 316 316 73 73 289 289 321 320 159 159 285 34 430 430 430 399 70 65 111 438 438 143 36 108 119 351 405 405 405 206 178 192 192 176 135 135 248 248 465 377 377 374 374 132 399 70 383 383 383 383 383 385 35 310 310 107 447 97 427 56 56 47 491 187 80 491 80 289 321 320 74 485 213 213 252 215 354 29 302 302 175 175 81 353 353 353 353 467 467 297 297 297 293 43 345 109 469 281 342 342 6 36 119 351 351 139 139 175 81 176 135 200 248 248 429 429 429 429 464 464 111 111 111 111 438 438 239 75 371 371 374 374 374 374 132 132 132 98 98 13 414 247 312 312 126 187 292 12 12 12 12 23 23 23 101 149 149 228 321 321 320 345 430 389 236 310 107 152 152 152 378 42 147 380 180 486 486 460 240 216 300 300 495 406 467 111 111 111 438 314 36 371 371 278 278 314 242 242 242 457 457 108 108 119 437 437 405 405 405 405 206 206 215 35 458 192 419 427 229 247 312 126 292 292 23 1 408 408 408 149 228 228 316 316 80 80 289 321 209 188 118 118 118 118 118 261 219 152 152 152 152 186 162 232 172 115 470 470 403 171 171 422 162 342 342 273 273 84 16 88 106 284 481 293 293 293 150 162 232 86 238 272 371 180 106 284 405 405 405 206 206 215 215 233 352 419 13 229 82 312 187 187 187 187 47 47 47 47 491 491 491 316 491 316 80 80 435 435 435 435 209 111 111 111 438 143 458 445 445 445 351 351 351 365 365 365 365 330 388 64 64 77 77 342 68 238 6 272 180 405 405 405 215 215 35 402 345 409 409 94 199 111 111 111 438 399 217 473 473 476 476 476 143 458 192 180 230 230 215 35 35 70 70 46 46 46 438 438 399 217 70 65 480 480 480 480 299 299 339 394 465 108 377 123 123 123 88 277 277 277 277 385 131 34 106 297 297 297 293 216 114 114 84 84 88 88 177 177 177 143 36 77 342 86 238 6 336 371 490 490 490 349 349 261 469 469 469 458 144 27 100 100 100 375 375 122 122 227 227 419 427 56 56 491 312 312 312 12 12 12 12 12 12 12 12 12 12 260 260 260 260 260 491 163 163 163 366 491 366 491 316 491 366 366 491 40 40 40 40 316 289 321 321 289 7 7 217 217 473 486 486 486 460 460 460 169 164 164 164 164 219 219 219 485 477 477 374 374 132 132 98 13 417 417 417 417 237 421 421 128 128 193 17 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/hidden_unit/dict.km.txt b/SpeechT5/SpeechLM/dataset/LibriSpeech/hidden_unit/dict.km.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbfe59e554d6234f3631d8d09d9281c2160f4675 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/hidden_unit/dict.km.txt @@ -0,0 +1,500 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 +364 364 +365 365 +366 366 +367 367 +368 368 +369 369 +370 370 +371 371 +372 372 +373 373 +374 374 +375 375 +376 376 +377 377 +378 378 +379 379 +380 380 +381 381 +382 382 +383 383 +384 384 +385 385 +386 386 +387 387 +388 388 +389 389 +390 390 +391 391 +392 392 +393 393 +394 394 +395 395 +396 396 +397 397 +398 398 +399 399 +400 400 +401 401 +402 402 +403 403 +404 404 +405 405 +406 406 +407 407 +408 408 +409 409 +410 410 +411 411 +412 412 +413 413 +414 414 +415 415 +416 416 +417 417 +418 418 +419 419 +420 420 +421 421 +422 422 +423 423 +424 424 +425 425 +426 426 +427 427 +428 428 +429 429 +430 430 +431 431 +432 432 +433 433 +434 434 +435 435 +436 436 +437 437 +438 438 +439 439 +440 440 +441 441 +442 442 +443 443 +444 444 +445 445 +446 446 +447 447 +448 448 +449 449 +450 450 +451 451 +452 452 +453 453 +454 454 +455 455 +456 456 +457 457 +458 458 +459 459 +460 460 +461 461 +462 462 +463 463 +464 464 +465 465 +466 466 +467 467 +468 468 +469 469 +470 470 +471 471 +472 472 +473 473 +474 474 +475 475 +476 476 +477 477 +478 478 +479 479 +480 480 +481 481 +482 482 +483 483 +484 484 +485 485 +486 486 +487 487 +488 488 +489 489 +490 490 +491 491 +492 492 +493 493 +494 494 +495 495 +496 496 +497 497 +498 498 +499 499 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/hidden_unit/train_sample100.km b/SpeechT5/SpeechLM/dataset/LibriSpeech/hidden_unit/train_sample100.km new file mode 100644 index 0000000000000000000000000000000000000000..dc3336034c88c053523a9e5076515ce4d56c19b6 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/hidden_unit/train_sample100.km @@ -0,0 +1,100 @@ +17 17 17 296 296 317 317 317 317 317 491 461 461 461 461 461 461 491 491 184 184 184 289 310 107 107 395 351 486 486 460 215 215 35 96 272 300 382 382 245 43 364 276 174 174 174 174 319 282 282 388 303 303 117 404 404 439 439 225 225 225 225 225 225 225 491 391 391 47 491 73 80 289 7 7 217 473 258 258 258 31 342 224 494 494 494 368 281 9 142 142 147 147 329 329 329 329 329 329 36 310 107 395 395 302 302 497 497 251 251 241 241 431 329 432 330 330 388 195 195 64 212 212 131 483 226 226 226 209 356 356 356 356 31 162 68 224 224 494 494 215 129 74 190 190 499 499 499 265 265 85 85 85 85 207 318 185 185 433 433 86 6 6 227 419 417 417 417 237 237 237 237 237 237 237 237 237 237 237 237 362 491 362 305 40 491 305 40 40 362 362 40 40 40 40 40 40 40 40 218 491 218 218 218 491 305 218 491 218 218 218 218 218 218 491 218 435 491 218 491 218 218 218 491 218 218 491 369 491 369 369 369 369 369 21 21 21 21 21 21 21 21 408 408 408 149 228 228 491 289 320 7 473 258 258 258 258 342 342 224 494 494 494 494 31 9 9 142 397 147 147 329 329 329 329 329 143 36 107 107 395 302 302 497 497 251 251 251 241 241 431 278 278 278 278 330 388 388 195 195 195 243 212 131 419 439 225 225 225 80 491 80 7 7 251 241 431 278 278 278 173 173 402 402 401 401 401 401 401 491 310 107 395 395 180 151 151 151 169 150 150 86 86 238 6 272 397 133 345 109 109 109 264 264 313 216 216 22 448 448 448 14 14 14 145 145 145 486 460 460 460 173 280 29 242 242 116 33 250 250 251 241 81 444 324 324 324 324 324 301 339 217 217 217 217 217 473 65 290 290 290 290 290 434 434 339 339 33 250 250 42 42 147 147 380 288 84 496 496 496 496 496 274 274 37 24 131 404 439 78 414 80 80 80 80 80 80 80 401 384 371 278 278 278 215 35 35 96 401 401 401 401 401 401 401 401 401 239 384 371 180 315 315 315 315 315 450 450 413 413 94 199 340 340 33 76 465 377 123 123 123 88 88 44 44 44 251 251 241 431 278 278 285 285 302 302 497 497 497 58 72 72 72 437 481 481 481 481 481 481 175 175 81 84 84 84 496 274 98 98 229 247 247 126 126 126 326 326 326 326 326 101 101 149 228 491 373 393 234 234 155 190 190 487 288 288 278 330 339 64 64 212 310 447 447 6 272 472 345 333 333 220 220 164 14 14 411 411 284 481 481 481 293 293 122 122 384 300 334 334 304 304 304 49 269 342 168 89 89 89 446 33 33 250 251 251 241 431 470 171 171 171 252 252 325 34 41 324 324 318 368 368 342 9 219 485 286 286 382 382 313 236 239 161 161 79 499 499 405 405 206 215 215 233 270 270 433 342 224 89 89 322 67 394 76 465 161 161 492 492 492 8 8 280 498 498 498 498 498 396 186 39 54 238 6 272 472 336 336 62 62 62 62 62 146 464 44 44 44 8 32 401 354 190 190 380 380 499 496 496 496 178 233 233 458 192 419 427 247 247 15 193 193 17 +17 17 363 363 51 51 228 320 127 45 45 45 385 131 58 72 72 110 110 110 110 486 460 240 240 325 34 154 154 154 457 478 478 232 232 482 482 172 115 273 273 153 153 153 372 372 396 396 186 186 54 54 172 224 273 255 255 43 364 364 276 109 109 403 403 403 403 403 207 246 324 301 301 129 401 354 354 180 376 376 376 460 178 178 458 192 192 242 340 116 466 466 22 283 455 43 364 364 276 276 153 153 496 496 37 37 24 77 270 342 224 69 69 130 130 198 22 448 448 448 464 180 424 424 424 424 424 274 122 131 472 221 401 82 144 27 437 151 151 169 169 164 164 472 221 401 259 29 380 382 396 313 385 35 472 401 259 74 425 425 386 343 343 343 343 343 358 358 39 39 433 433 160 160 160 112 427 56 56 491 312 312 341 341 341 341 341 341 12 12 12 21 21 21 21 21 21 21 21 21 408 408 408 408 391 391 228 491 491 412 177 177 177 177 177 131 133 345 141 141 141 281 453 142 397 456 456 456 456 129 259 74 485 485 485 485 374 374 325 449 449 191 191 191 314 314 36 377 87 87 8 8 420 420 420 324 464 44 44 44 94 335 335 411 411 188 121 121 33 64 76 465 465 161 161 487 469 469 143 458 192 192 278 278 278 37 314 131 472 72 72 72 72 72 72 110 110 443 120 240 314 314 26 26 26 251 241 431 235 235 235 235 235 413 200 200 248 248 248 212 354 190 380 380 499 496 496 496 178 233 458 192 192 340 340 340 94 199 154 154 77 342 342 142 14 411 498 498 498 498 498 134 175 81 166 324 324 464 382 382 245 129 458 208 208 441 441 441 153 153 372 372 396 186 186 323 323 238 6 272 377 487 487 374 313 216 216 114 124 124 124 274 274 368 269 9 142 397 336 276 109 109 496 496 496 37 37 37 24 270 270 433 160 427 229 247 247 126 126 326 326 326 326 326 101 101 149 149 228 228 491 345 333 333 333 220 220 164 402 221 401 401 401 491 384 371 180 106 306 306 306 306 396 396 178 178 35 458 96 96 66 66 68 68 68 68 115 115 444 213 213 213 143 458 208 208 487 487 288 277 385 143 270 270 342 224 69 462 462 130 402 402 401 401 491 74 190 441 441 441 153 153 182 182 182 182 182 497 175 175 81 89 89 446 116 33 131 472 221 458 445 445 351 351 486 486 460 460 169 150 342 342 86 105 336 445 445 470 403 403 171 171 171 246 246 252 24 131 404 439 78 170 305 491 28 28 28 491 491 491 2 201 305 305 491 305 305 2 316 316 316 316 316 491 491 289 289 289 320 354 159 159 159 159 159 240 35 131 472 221 336 354 62 62 62 62 62 438 216 22 283 455 236 108 119 119 103 103 103 103 103 85 299 203 53 473 177 177 143 131 133 133 147 380 288 213 213 213 252 143 310 447 447 447 26 26 251 251 241 81 329 329 329 330 388 195 195 471 471 49 453 142 58 72 72 437 437 481 481 481 481 293 175 175 81 84 84 84 84 84 16 274 274 98 483 483 440 188 177 177 177 131 133 133 345 141 141 141 281 9 168 44 44 143 458 208 208 441 441 441 346 346 265 265 85 85 85 146 146 277 277 277 385 385 227 419 225 225 226 197 7 364 276 109 109 139 139 293 293 122 143 458 144 27 27 121 116 33 33 212 239 371 180 151 151 151 178 35 96 96 36 272 191 191 191 37 314 26 251 241 431 431 278 285 285 302 302 497 497 186 162 482 482 338 238 161 79 487 288 288 360 360 434 434 434 203 381 381 404 13 491 247 15 193 193 193 17 +17 17 17 363 363 51 51 228 491 373 155 155 155 148 148 387 372 313 10 479 479 307 307 307 307 61 167 449 449 34 357 357 357 357 357 173 280 29 242 116 94 199 44 44 44 8 129 401 259 354 190 190 380 380 499 496 496 496 167 233 233 144 192 419 419 439 225 225 225 80 80 491 491 144 389 389 389 389 389 133 133 42 147 147 380 499 319 319 319 348 348 195 394 90 76 74 74 437 311 311 311 311 311 311 460 169 150 342 86 6 6 196 217 473 258 258 258 31 342 224 494 494 368 281 9 142 397 147 329 329 329 329 329 36 310 107 302 302 302 497 497 251 251 251 241 431 329 329 330 116 33 195 195 471 471 49 269 142 238 6 272 106 153 153 372 372 372 245 43 345 333 333 220 220 216 180 113 113 113 113 167 167 236 239 401 384 219 485 485 374 374 132 132 42 147 456 456 456 456 416 144 27 106 306 306 306 306 306 306 396 313 24 24 131 472 393 155 332 332 332 313 236 239 239 384 371 213 213 213 252 186 39 342 342 11 11 11 379 379 379 394 76 478 66 68 68 115 267 41 41 41 246 3 464 464 89 194 446 446 446 64 212 239 384 490 490 143 458 144 208 441 441 153 153 153 372 372 372 467 467 467 275 203 381 381 48 404 13 491 491 312 312 312 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 408 408 408 408 149 149 228 491 289 412 177 177 177 177 177 35 401 259 74 190 190 488 488 488 488 488 8 29 134 134 134 134 8 8 359 359 166 166 166 324 301 378 345 141 141 141 281 9 142 221 144 27 27 437 370 370 370 370 370 370 348 64 76 310 107 395 395 459 459 459 271 271 39 86 86 238 198 45 45 45 45 45 35 401 196 217 473 258 258 258 31 342 342 224 494 494 494 368 453 9 142 397 147 147 329 329 329 329 329 329 143 310 107 107 395 302 302 302 375 497 497 497 43 364 345 141 141 141 31 162 232 68 172 115 470 278 278 325 34 176 135 135 200 200 464 415 415 415 415 415 131 183 156 156 156 156 245 43 364 364 109 109 278 278 116 33 64 212 212 34 84 84 84 84 274 274 98 229 229 247 126 126 326 326 101 408 408 228 491 491 491 445 445 213 213 213 213 252 215 458 176 176 135 135 200 200 44 44 44 44 99 338 338 338 338 338 395 106 306 306 306 306 396 396 215 35 35 335 145 284 265 265 265 85 85 85 146 464 464 106 125 125 125 125 348 94 335 14 411 204 204 204 204 204 204 204 29 29 337 337 324 422 422 164 164 164 214 214 214 328 200 248 466 114 114 45 45 385 90 401 82 74 119 311 311 311 311 311 311 311 311 311 282 169 169 150 39 433 86 238 6 75 227 419 225 225 225 225 225 491 373 305 80 289 491 155 165 165 165 165 165 203 53 212 239 190 380 380 496 496 178 35 96 270 342 342 224 89 89 446 33 394 310 107 395 395 106 139 424 387 122 122 122 300 300 242 242 116 94 335 335 411 230 230 230 230 230 230 215 215 233 233 419 427 229 247 247 126 126 193 193 193 193 17 +17 17 17 363 363 51 149 228 228 209 83 194 194 194 322 322 67 212 127 45 45 45 45 240 240 325 118 118 118 118 118 402 338 400 400 400 30 301 301 10 479 331 84 84 496 274 252 36 449 459 459 459 31 342 86 86 6 272 483 483 411 475 475 475 475 475 475 475 475 349 164 164 214 214 214 214 200 248 14 14 411 287 284 284 284 426 426 426 206 206 206 24 335 335 226 157 157 157 157 157 245 14 14 411 145 113 113 113 113 285 285 34 462 462 130 402 401 401 491 74 425 425 386 386 431 343 343 343 343 358 358 358 358 358 39 433 433 433 160 427 247 247 247 126 126 292 326 326 326 326 326 408 408 149 228 491 373 338 338 400 400 400 400 301 378 43 345 389 389 389 314 314 196 309 309 479 331 463 463 463 463 280 29 382 245 245 42 42 147 380 380 288 443 443 120 169 169 150 39 433 86 86 86 6 6 272 34 89 319 319 348 394 76 108 377 139 139 139 139 293 186 99 338 400 400 400 30 3 58 254 254 254 314 131 393 234 234 261 25 470 264 264 468 468 468 396 313 143 449 449 191 191 191 325 180 180 113 113 113 113 113 167 314 314 401 401 198 22 283 455 455 43 364 364 276 346 346 346 265 265 265 265 85 85 85 146 146 318 318 368 453 342 168 89 89 446 116 212 131 133 43 364 276 109 109 264 264 264 468 245 245 349 234 234 155 25 148 148 148 372 372 304 304 49 9 9 221 198 127 114 114 264 264 468 406 406 467 467 106 284 284 426 426 206 206 37 173 352 352 352 352 419 439 439 237 237 237 491 491 491 28 491 491 491 491 341 341 341 341 341 341 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 369 260 491 163 163 163 163 163 163 316 491 316 316 73 289 289 289 127 114 0 222 468 353 353 353 353 215 35 259 74 425 425 386 431 432 330 330 348 76 449 41 324 464 462 462 462 402 221 259 74 351 213 213 213 252 252 129 354 100 100 100 497 497 335 335 440 188 188 340 116 94 199 145 145 145 486 460 460 173 280 29 242 242 116 33 250 250 359 359 474 474 474 324 246 19 3 3 14 14 209 145 194 194 446 446 388 64 212 335 14 411 145 145 113 113 113 450 413 285 34 223 223 223 280 280 277 277 277 277 277 233 75 227 427 229 247 312 292 292 326 326 101 101 149 391 228 491 491 373 489 489 489 489 143 458 144 389 389 389 94 199 255 255 236 36 108 119 119 351 432 432 432 330 388 195 64 131 472 472 221 458 144 208 208 425 386 386 496 496 496 274 186 186 54 54 86 26 26 474 474 166 301 143 36 377 123 123 123 114 222 222 222 222 313 10 10 479 398 290 171 171 252 215 458 29 382 382 304 368 269 342 142 221 336 354 278 278 278 368 453 342 86 196 196 94 459 459 459 271 31 342 342 221 221 336 354 62 62 62 62 62 438 438 143 36 384 371 371 278 278 330 33 64 76 108 449 69 223 130 402 196 479 331 255 154 416 458 208 386 431 151 151 151 178 458 96 36 272 176 135 135 200 248 248 127 114 222 222 222 406 406 467 467 350 350 350 350 350 350 413 413 303 48 404 13 229 491 491 312 15 15 15 193 193 193 17 +17 17 17 296 363 363 51 51 51 491 491 491 491 320 320 159 159 159 159 314 35 196 196 473 258 258 258 31 342 224 494 494 494 368 453 142 142 397 147 380 329 329 329 329 143 36 310 107 395 134 302 302 497 497 251 251 251 241 431 278 278 278 330 388 195 64 212 131 133 133 141 141 141 281 453 142 221 336 174 174 174 174 348 199 223 223 223 130 198 198 124 124 124 124 124 368 31 342 86 221 221 336 445 445 445 351 351 171 171 171 252 215 29 134 134 134 8 259 354 100 100 497 497 497 122 129 259 144 208 208 190 487 487 213 213 213 252 143 36 310 107 395 334 334 334 304 304 185 49 269 342 224 224 489 489 489 143 144 27 389 389 116 33 250 217 217 473 365 365 365 330 94 199 469 469 469 24 36 310 447 447 447 6 127 222 222 222 245 245 14 411 411 350 350 350 350 413 64 394 465 465 27 27 121 116 33 394 478 478 232 172 224 273 470 498 308 308 467 299 388 379 471 471 49 342 168 89 194 194 446 322 64 212 198 114 114 84 496 496 274 318 49 269 342 224 69 462 130 129 402 106 493 493 493 216 300 300 382 245 349 205 261 261 25 496 496 496 496 274 274 233 96 270 433 342 168 340 340 116 33 36 377 123 123 216 283 455 8 354 106 306 306 306 306 396 396 416 416 192 192 275 275 116 303 303 48 48 229 170 491 491 312 312 312 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 260 408 408 408 391 391 391 228 491 491 373 338 400 400 400 30 378 378 141 141 141 281 342 342 44 44 44 94 331 84 84 496 496 285 449 134 134 8 100 100 497 497 58 72 72 268 268 268 268 268 169 186 39 54 142 397 397 276 346 346 346 428 85 146 146 358 352 352 352 352 352 352 417 417 417 417 237 491 435 225 225 225 72 156 156 156 156 245 245 43 364 276 276 109 109 498 396 396 178 143 259 458 208 345 141 141 281 281 9 168 106 297 297 297 297 297 297 43 43 345 109 109 171 171 368 368 342 342 221 336 371 180 180 319 319 319 319 282 388 195 195 195 117 404 335 440 209 83 194 194 194 194 446 446 64 212 131 133 364 364 276 109 109 443 139 139 139 293 293 497 122 239 36 371 180 319 319 319 319 282 303 303 303 303 117 404 439 439 439 78 237 47 47 47 80 491 80 491 373 373 338 338 338 400 400 400 30 30 246 246 246 3 3 197 197 7 42 147 147 147 380 210 210 210 210 486 365 282 282 282 388 388 195 195 199 404 404 197 197 216 22 283 283 455 38 162 482 115 273 273 84 496 88 88 176 176 176 328 200 248 478 66 172 115 273 498 498 498 245 143 458 458 302 302 302 302 375 98 98 229 247 247 15 15 193 193 193 17 +17 17 17 363 363 363 51 51 228 491 373 72 110 110 139 139 139 293 293 215 35 96 96 6 472 472 133 42 147 380 499 499 319 319 319 348 195 195 466 22 283 283 38 162 68 68 68 273 273 319 319 319 348 33 64 212 212 93 93 93 93 171 422 186 39 86 86 105 105 336 208 153 153 153 153 182 182 375 375 497 98 98 483 440 83 83 55 55 55 322 67 212 131 133 345 141 141 141 141 281 9 198 198 22 283 455 38 162 482 482 482 238 6 161 161 499 499 235 235 235 235 348 64 212 459 459 459 459 31 54 86 6 272 472 221 336 259 190 190 190 488 499 499 405 405 206 215 215 35 29 69 69 223 130 198 198 22 283 455 236 129 36 310 107 395 395 487 498 498 498 396 178 36 310 107 447 483 226 226 209 411 171 171 171 171 252 252 143 77 478 342 224 494 494 494 31 342 342 115 273 470 265 265 265 85 85 85 146 469 469 469 36 449 41 41 41 324 324 3 335 440 145 194 194 446 446 67 76 90 393 393 234 261 25 148 148 148 148 372 372 467 467 467 242 116 33 250 217 217 473 473 278 278 99 436 436 60 60 298 379 379 195 471 471 49 49 168 106 106 405 167 215 35 458 96 368 453 453 371 278 278 139 175 81 324 324 219 495 495 495 495 467 41 41 41 41 19 454 454 229 491 491 312 312 312 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 408 408 408 149 228 491 491 320 219 357 357 357 240 240 131 133 133 345 333 333 220 220 216 106 106 297 297 297 297 297 297 293 293 293 122 4 127 114 258 258 258 258 258 271 271 39 433 433 433 433 160 160 160 97 97 225 225 225 225 7 217 473 258 258 258 258 342 342 224 494 494 494 368 9 142 142 397 147 380 329 329 329 329 329 310 107 107 395 302 302 497 497 349 349 205 261 261 25 315 315 315 450 450 413 413 212 131 335 226 209 44 44 44 236 8 32 259 354 180 319 319 319 348 348 64 64 64 64 212 384 34 11 11 11 116 33 243 243 401 401 491 108 119 119 437 103 103 103 103 103 85 85 299 299 339 64 76 36 87 87 66 66 68 68 115 278 278 278 36 131 393 393 155 155 332 332 332 14 14 14 411 145 145 284 315 315 450 450 153 88 372 372 304 304 185 49 453 168 415 415 415 415 58 183 156 156 156 313 143 458 458 445 351 278 278 278 36 310 107 107 395 242 116 116 250 250 250 276 109 109 278 330 116 33 64 212 212 371 84 84 84 84 274 274 263 229 247 247 126 126 326 326 326 326 326 101 101 149 228 228 289 320 309 309 479 278 278 278 325 449 449 176 176 135 328 200 200 195 248 197 197 197 401 491 144 27 27 437 437 405 405 405 206 167 35 35 242 242 242 33 33 33 250 250 364 276 276 153 153 372 372 372 215 215 35 472 472 221 401 491 208 208 441 441 441 441 109 278 139 139 139 375 375 375 233 233 270 270 433 433 160 160 18 112 112 439 439 439 225 237 237 237 47 491 47 491 491 73 491 373 373 338 400 400 400 30 30 58 110 254 254 254 314 196 196 479 331 278 278 278 325 449 191 191 191 314 314 478 478 68 68 115 273 278 278 143 96 96 232 68 68 68 6 272 371 444 360 360 252 339 199 199 223 223 130 402 198 198 114 57 57 57 203 381 381 48 229 491 247 15 193 193 193 17 +17 17 17 363 51 51 228 491 412 83 145 253 253 253 253 368 342 168 168 145 145 486 460 460 173 280 29 242 242 242 359 359 359 81 324 324 324 3 58 72 268 268 268 268 268 268 274 186 39 54 86 105 336 445 485 485 213 485 215 129 354 29 334 304 304 185 131 397 397 345 347 347 347 347 43 43 364 276 174 174 426 426 206 167 457 76 36 377 87 87 87 236 259 108 119 119 351 351 443 139 139 139 293 175 175 81 89 340 340 116 33 335 14 14 491 411 411 284 284 284 405 405 405 206 206 206 37 24 131 133 4 4 280 153 153 343 343 343 343 358 358 39 342 342 224 50 50 50 50 50 50 185 269 433 160 112 427 82 247 312 126 292 292 292 326 326 326 326 326 101 408 149 149 491 412 412 55 55 55 322 67 131 472 221 458 445 445 213 213 213 213 252 215 458 176 176 135 135 135 200 200 44 44 44 44 99 338 338 338 338 395 273 106 306 306 306 396 396 215 35 35 335 14 14 411 284 265 265 265 265 85 85 146 464 464 125 125 125 125 466 466 22 283 455 399 217 217 217 473 290 290 290 290 290 434 434 434 339 339 33 90 42 42 147 147 380 380 288 496 496 496 496 274 274 274 24 131 472 198 198 127 45 45 385 90 221 458 208 208 190 499 499 499 405 405 206 150 150 54 86 238 6 6 472 472 198 22 283 455 38 72 72 437 437 481 481 481 481 175 175 81 84 84 84 84 274 274 98 229 247 247 126 126 326 326 326 326 326 101 149 149 228 491 83 83 55 55 322 67 67 131 133 133 364 276 276 346 346 486 315 315 315 315 450 450 450 413 413 348 64 212 131 230 230 230 230 230 35 35 401 198 198 22 283 455 38 162 232 232 232 68 68 6 371 371 213 213 213 252 215 129 259 29 29 42 42 42 147 380 288 443 443 443 240 314 131 183 183 183 183 183 278 278 278 139 139 139 497 497 497 497 122 259 259 354 420 420 324 464 180 180 426 426 426 426 426 282 388 303 303 64 212 465 227 419 439 78 491 305 421 491 491 491 421 491 421 491 491 491 128 128 128 491 128 193 193 193 17 +17 17 17 363 363 363 363 51 149 228 491 491 411 145 475 475 475 475 94 475 475 475 324 301 8 354 106 493 151 240 325 41 41 324 324 3 183 183 489 489 489 489 489 43 43 276 109 109 443 330 330 348 64 76 465 449 483 145 113 113 113 113 113 240 285 285 34 223 223 130 280 277 277 277 277 277 385 36 36 227 419 225 225 226 226 226 491 209 157 157 157 157 157 372 335 14 14 411 188 340 340 116 33 64 394 465 108 377 123 123 123 88 88 277 277 277 277 385 24 131 427 229 247 126 126 126 326 326 326 101 408 149 491 228 373 110 110 110 254 254 240 314 35 108 377 87 87 87 129 259 74 311 311 311 311 311 311 311 311 169 150 342 342 342 168 106 410 410 410 410 410 29 29 382 313 216 216 114 92 92 92 92 92 385 131 472 183 183 183 351 278 278 139 139 139 497 497 497 497 42 42 8 147 380 380 499 84 496 496 496 496 274 274 274 37 24 131 419 419 225 225 225 225 82 83 55 55 55 322 67 394 478 478 232 232 172 172 115 273 84 84 84 84 16 16 16 274 274 274 98 13 229 247 312 126 126 23 23 23 101 101 101 149 149 228 491 289 289 7 147 147 380 499 319 319 319 348 466 466 466 212 22 448 448 448 14 14 145 319 319 319 319 348 195 195 195 394 478 478 232 68 68 68 267 267 267 267 267 434 339 339 33 90 90 32 465 144 27 180 284 405 426 426 413 348 64 76 26 26 26 359 81 81 277 277 385 325 34 69 223 130 130 402 196 196 217 473 473 258 258 31 342 224 494 494 494 494 368 9 142 142 42 42 147 380 329 329 329 329 252 143 36 107 107 395 302 302 302 497 497 185 269 9 9 483 14 411 411 297 297 297 297 297 297 293 293 497 186 162 68 68 172 115 267 267 267 267 360 360 176 176 176 135 328 328 200 199 106 106 265 265 265 265 85 85 85 85 207 207 19 454 13 417 417 417 237 237 170 28 28 28 28 28 362 491 491 362 362 362 362 491 491 362 211 491 491 369 369 369 369 21 21 21 21 21 21 21 21 21 21 21 21 21 21 408 408 408 408 391 391 73 491 289 373 338 338 400 400 400 301 378 378 141 141 141 281 162 68 68 115 470 278 278 278 449 176 176 135 135 200 248 248 127 114 264 264 264 468 245 245 43 364 276 174 174 174 174 348 94 199 145 145 460 460 460 402 402 6 272 300 469 313 313 10 479 398 398 374 374 132 413 339 199 199 340 116 116 199 335 14 411 411 498 498 498 498 134 175 359 81 166 324 324 422 236 36 310 107 395 395 485 374 374 374 374 132 132 413 303 303 303 117 404 439 439 78 237 237 237 47 47 491 47 2 491 2 2 2 2 316 316 491 316 316 491 491 73 435 289 7 127 5 5 5 38 162 68 68 68 115 273 319 319 319 319 348 348 195 250 133 141 141 141 281 342 86 221 458 144 27 351 319 319 319 53 53 176 135 135 200 200 200 464 340 340 340 94 199 415 415 415 35 198 22 283 455 455 364 345 109 278 278 330 116 33 64 212 212 371 84 84 84 84 274 274 274 274 43 43 401 364 276 276 153 153 153 387 387 372 372 396 396 203 53 473 89 446 446 67 131 472 221 401 354 190 380 380 499 499 428 428 146 146 358 358 233 233 227 419 419 427 56 491 421 15 15 15 193 193 193 17 +17 17 17 363 363 363 363 51 51 51 228 491 320 127 448 448 448 14 14 411 153 153 387 372 372 396 313 35 310 107 395 382 382 313 313 285 34 125 125 125 125 348 466 22 283 283 38 162 232 232 232 26 26 26 431 431 84 496 496 274 274 457 457 401 401 354 354 255 255 251 251 251 241 431 84 84 84 16 16 274 274 216 216 283 283 455 58 72 72 72 268 268 268 268 268 268 450 450 274 271 271 39 39 86 142 397 336 345 141 141 281 281 342 168 340 340 116 199 44 44 44 129 259 190 190 380 380 499 499 428 85 146 146 285 34 302 302 497 497 349 349 234 234 234 234 234 261 425 425 386 431 151 151 151 169 169 169 99 436 338 338 447 395 69 462 462 402 402 221 401 259 491 74 351 351 360 360 360 200 200 248 76 76 465 445 485 324 324 324 301 378 364 364 346 346 346 428 428 146 146 143 36 472 221 401 401 259 354 425 425 241 431 374 374 374 374 132 132 132 203 381 381 404 13 491 247 312 126 126 326 326 326 326 101 101 101 149 228 491 491 373 72 72 437 284 319 319 319 203 53 53 53 53 469 212 212 131 34 410 410 410 410 410 173 280 29 29 382 245 245 8 259 354 62 62 62 62 146 464 464 44 44 399 217 217 217 473 286 286 286 468 468 406 337 337 337 324 464 464 277 277 325 34 462 462 462 402 402 221 401 401 354 213 213 213 213 213 246 246 246 246 318 318 185 185 433 433 433 160 160 112 112 78 56 491 491 28 28 491 491 341 341 341 341 12 12 12 12 12 260 260 260 260 391 391 391 73 289 491 289 108 119 437 437 284 284 426 426 203 53 473 459 271 31 39 342 342 26 26 251 251 241 81 329 120 120 330 388 195 195 195 64 212 131 419 439 439 439 439 225 225 225 237 47 491 47 80 80 491 80 197 225 287 287 44 44 44 399 217 217 473 398 213 213 213 143 143 458 144 26 26 251 241 431 278 278 285 449 302 302 497 497 399 399 217 217 473 136 136 136 136 136 136 136 282 282 388 195 404 58 489 489 489 489 489 399 53 335 14 145 145 145 486 460 460 173 280 29 242 242 116 250 359 359 81 324 324 324 422 129 259 74 485 213 213 213 213 252 215 129 259 354 100 100 100 497 497 122 143 458 144 27 437 481 481 481 481 481 293 293 122 122 472 133 42 147 147 380 329 329 171 252 143 36 107 395 302 302 302 497 497 497 251 251 251 241 81 431 278 278 330 388 379 195 195 471 471 77 269 342 142 72 72 72 437 151 151 151 368 453 342 142 221 336 354 275 275 275 275 303 303 195 243 131 419 427 491 247 126 126 126 292 326 326 326 326 326 326 326 326 326 101 101 149 149 228 320 345 141 141 281 162 232 232 172 172 115 273 84 496 88 88 88 176 176 135 135 200 248 183 183 257 257 257 257 453 342 26 26 251 241 241 431 171 171 171 252 457 457 401 259 108 119 119 351 308 308 308 313 313 94 199 469 469 215 35 96 66 68 68 68 115 115 444 444 213 246 252 252 325 34 125 125 125 125 466 466 22 283 455 58 72 72 351 278 278 139 139 293 497 497 349 349 234 234 261 25 485 485 485 464 139 139 375 497 497 122 122 36 472 221 336 354 420 420 324 464 464 180 106 426 426 426 426 413 348 64 212 212 198 22 283 455 8 354 354 106 284 306 306 306 306 306 396 396 396 37 303 303 48 404 78 229 491 491 15 15 193 193 193 17 +17 17 17 363 363 363 51 51 51 228 491 7 217 473 258 258 31 342 342 494 494 494 281 9 142 397 147 329 329 329 329 143 310 107 302 302 302 497 122 10 10 309 398 398 398 398 398 374 132 216 216 127 45 45 45 325 183 451 30 30 30 3 14 14 411 284 284 405 405 405 206 206 167 24 227 227 472 221 401 491 354 420 420 422 143 458 144 27 351 351 151 253 368 368 99 338 338 338 400 400 400 400 30 3 58 58 110 254 254 254 254 58 58 72 72 110 498 498 498 498 396 313 325 183 183 57 57 57 203 53 394 90 76 108 108 119 351 139 139 139 139 293 293 215 35 74 74 329 329 213 329 252 325 300 382 382 245 399 217 70 65 65 153 329 372 406 406 467 313 186 39 342 342 224 242 242 116 466 466 22 283 448 448 14 411 213 213 213 213 173 173 402 196 196 176 328 328 248 248 8 354 255 255 38 349 205 234 261 148 148 148 148 148 148 372 372 372 59 452 335 197 226 226 209 188 188 340 340 340 340 33 195 117 117 117 197 197 197 80 491 80 491 491 7 7 7 364 345 109 329 139 329 81 219 219 485 464 464 203 203 33 394 212 465 107 395 329 329 329 171 171 171 301 301 8 129 354 425 175 175 431 329 329 264 468 468 304 313 186 162 323 482 482 482 238 6 272 106 153 153 153 182 372 372 372 372 59 245 335 14 209 411 410 410 410 410 410 410 173 29 29 495 406 467 415 415 131 90 259 144 27 437 437 306 306 306 306 396 203 53 469 469 469 325 325 41 41 41 19 19 454 229 247 126 126 126 326 326 326 326 326 326 101 149 149 228 289 491 127 45 45 45 45 240 183 183 183 451 30 30 30 301 399 217 473 432 432 432 330 348 64 457 401 82 108 377 87 87 38 162 323 323 115 273 84 84 496 274 274 58 58 183 257 257 257 31 9 238 6 119 161 308 308 308 396 313 94 199 199 459 215 215 96 66 342 172 224 41 41 324 3 301 314 198 22 283 455 116 199 331 443 443 178 178 458 96 86 238 6 272 145 145 460 460 460 402 402 6 272 300 469 313 10 94 398 398 374 374 374 132 413 303 303 48 404 13 170 491 491 491 312 15 15 292 292 292 193 193 193 193 17 +17 17 17 17 363 363 363 363 363 363 363 408 51 51 228 491 289 320 74 329 329 329 329 329 325 34 334 382 382 467 110 254 254 254 285 34 145 145 145 376 460 460 169 150 342 86 105 96 96 272 57 57 57 203 53 255 255 255 130 402 221 259 208 441 441 153 153 372 372 372 59 271 271 269 54 54 9 97 336 155 155 332 332 332 245 399 473 65 329 329 329 460 169 164 164 485 485 485 374 132 143 259 144 27 437 329 329 329 169 164 164 142 221 336 29 495 334 59 59 313 24 131 58 72 110 254 254 254 254 35 35 196 309 309 479 331 463 463 463 463 29 382 382 245 8 129 354 137 137 137 137 33 10 10 309 331 331 84 84 350 350 413 413 33 394 465 377 377 87 123 132 8 354 354 106 284 481 481 481 175 175 81 242 116 33 394 465 465 108 119 485 485 286 286 468 406 467 467 121 53 394 155 155 25 469 469 203 217 473 418 418 418 418 418 99 436 436 60 60 298 199 255 255 8 180 113 113 113 113 240 285 131 335 14 401 209 411 475 475 475 475 475 475 475 475 422 164 164 164 214 214 214 214 328 328 200 200 248 335 188 188 340 340 94 199 199 257 257 257 257 342 9 142 437 424 424 424 424 424 497 497 122 251 241 431 431 265 265 428 428 85 146 146 358 358 352 352 352 352 352 352 352 112 427 56 491 312 312 312 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 408 408 408 408 149 149 228 289 209 83 55 55 322 67 212 90 219 357 357 357 357 357 357 120 120 240 385 385 35 227 227 227 419 419 439 225 197 47 47 491 491 47 491 47 47 80 491 491 491 289 7 373 451 451 451 286 286 286 286 286 468 468 245 43 43 345 141 141 281 281 9 142 196 217 473 65 486 486 460 460 169 164 164 164 485 485 485 374 132 143 259 144 144 27 437 329 329 329 329 150 164 164 105 221 336 354 29 498 498 59 59 313 385 227 419 427 229 247 408 149 149 228 226 491 209 83 415 415 415 240 314 335 58 72 72 72 72 72 110 110 486 486 460 460 460 169 352 352 402 221 401 259 74 311 311 311 311 311 311 311 169 150 86 86 238 6 272 472 472 234 164 164 487 487 487 288 288 213 213 246 246 3 335 440 125 125 125 125 466 466 448 448 448 448 464 145 145 460 460 460 349 402 96 272 469 469 469 236 94 398 374 374 374 132 132 339 94 199 69 223 130 280 44 44 44 44 32 401 401 354 354 278 278 278 368 342 342 41 41 324 324 301 239 239 384 93 93 93 93 93 207 207 207 246 19 454 229 247 247 126 126 326 326 326 326 101 101 149 228 491 80 80 491 491 74 425 425 386 386 431 486 486 460 460 169 150 342 342 342 224 494 494 416 26 359 359 166 166 166 324 301 236 401 259 161 161 79 499 499 499 265 85 85 146 146 173 173 176 176 135 135 200 248 14 14 411 410 410 410 410 410 410 173 29 29 382 313 216 283 283 455 58 72 72 72 437 481 481 481 481 481 293 175 175 81 84 84 16 88 88 89 89 446 116 64 212 384 180 230 230 230 215 35 35 96 198 198 22 283 455 455 58 183 278 278 278 278 139 139 139 375 375 375 375 98 13 229 491 170 491 15 15 15 193 193 17 +17 17 363 51 228 412 412 83 194 194 446 67 67 131 183 257 257 257 257 453 342 221 221 336 354 354 443 443 443 169 150 342 86 86 6 6 272 472 66 482 482 115 485 374 374 132 252 36 449 462 462 402 402 221 336 144 208 425 386 386 431 496 496 496 496 496 274 274 37 233 185 185 269 323 18 427 427 247 247 126 126 292 23 23 408 408 391 391 228 228 289 491 320 407 407 407 407 310 107 397 397 141 141 141 281 281 9 142 221 221 336 491 74 74 425 425 386 386 431 290 290 290 290 434 434 339 339 195 33 394 76 465 74 190 190 190 487 487 374 374 374 132 132 358 352 352 352 402 198 198 45 45 45 45 131 183 451 30 30 30 301 378 345 141 141 281 453 9 221 336 144 180 84 496 88 88 176 176 135 328 200 335 14 14 145 145 113 113 113 113 206 285 449 34 69 223 130 280 180 145 145 486 460 460 173 280 280 242 242 116 33 250 251 241 81 256 444 213 246 246 246 19 19 454 454 78 170 170 491 28 491 491 312 312 187 292 292 12 12 21 21 21 21 21 21 21 21 21 21 408 408 408 408 149 149 491 491 289 491 209 83 55 322 67 325 30 30 30 30 3 58 58 72 110 110 254 254 254 254 314 35 198 127 283 455 455 236 129 401 401 401 354 354 431 151 151 240 416 416 192 41 41 41 324 3 464 89 89 446 348 466 22 283 455 38 162 232 482 172 115 106 106 153 372 372 372 406 467 302 302 497 497 399 399 217 473 473 264 264 264 264 264 468 468 59 59 59 245 43 364 345 407 407 407 310 107 447 221 336 354 420 420 236 129 36 108 119 119 351 496 496 496 274 143 458 192 242 242 116 116 466 212 45 45 45 325 183 30 30 301 378 141 141 141 281 342 9 221 336 144 180 84 88 88 88 176 135 135 200 200 464 44 44 143 458 27 27 121 121 33 478 478 232 68 172 115 273 278 278 278 285 495 495 495 134 134 134 134 8 100 100 100 497 122 401 401 401 371 278 278 278 31 39 86 86 6 272 11 11 11 11 379 379 471 471 270 433 433 433 18 112 56 56 491 312 312 312 187 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 408 408 408 408 149 228 228 289 491 7 309 479 331 315 315 315 315 450 450 16 293 293 335 197 197 197 197 197 197 197 197 491 491 7 7 364 364 364 364 276 181 181 181 181 181 264 264 264 264 468 468 468 245 245 43 364 364 430 430 430 430 430 342 342 221 196 217 217 217 217 473 486 486 486 486 460 169 169 164 164 164 485 219 485 485 132 143 143 129 82 144 27 27 437 329 329 151 169 164 164 164 164 221 401 259 29 29 382 313 313 35 131 472 221 401 401 401 401 491 144 180 180 84 84 350 88 88 88 176 176 176 176 328 328 200 200 200 117 454 404 483 226 226 226 226 491 83 55 55 55 322 67 67 212 131 133 364 364 276 181 346 346 181 265 85 85 146 378 378 345 430 430 430 430 342 342 451 30 30 324 422 143 401 401 144 27 180 84 84 496 88 88 88 176 176 135 135 200 248 248 248 216 127 114 114 264 264 264 264 264 59 59 452 452 263 263 417 417 237 491 237 237 421 421 421 491 491 491 128 128 128 491 128 128 193 193 193 17 +17 17 17 296 296 317 317 491 491 317 305 305 461 491 461 491 491 461 491 491 435 435 435 435 435 435 7 373 72 72 430 430 430 430 430 430 430 34 177 177 177 236 35 401 259 354 137 137 137 137 137 94 199 335 14 14 411 411 475 475 475 475 475 475 475 475 324 324 464 464 493 493 493 493 493 216 300 300 382 245 399 217 217 473 136 136 136 136 136 136 282 94 199 340 340 340 94 199 145 145 486 486 460 460 173 280 29 242 242 116 379 33 250 251 241 81 444 444 213 246 246 246 19 19 454 229 247 247 126 126 292 326 326 326 326 326 326 326 326 326 101 101 149 149 228 289 7 217 473 258 258 258 258 342 342 342 494 494 494 368 453 9 142 397 147 380 329 329 329 329 329 329 36 310 107 395 302 302 302 375 497 98 98 98 225 225 225 225 80 80 259 384 371 180 443 443 169 169 352 352 402 6 6 26 359 166 166 166 301 129 259 259 74 189 189 189 285 449 449 176 176 135 328 200 200 248 248 32 32 127 114 114 258 258 258 31 39 86 68 68 68 483 483 440 89 194 446 446 33 212 212 198 127 114 92 92 92 92 167 167 457 457 36 108 377 123 123 416 458 445 180 180 443 493 493 216 300 300 334 59 59 452 263 229 247 247 126 126 326 326 326 326 101 101 149 149 228 228 491 7 70 70 65 65 428 428 428 146 438 325 449 34 202 202 202 202 402 221 259 144 445 278 278 173 173 280 29 242 242 116 94 199 44 44 44 129 129 259 74 190 190 104 104 104 325 325 41 324 324 301 416 239 144 144 484 484 484 236 314 131 221 401 259 445 445 180 443 443 443 443 120 120 271 271 39 342 342 224 253 253 253 253 31 86 238 6 272 123 123 123 8 354 106 496 496 496 274 368 342 142 221 336 208 208 441 151 151 151 169 150 99 238 6 6 310 107 60 298 298 298 275 303 303 471 471 471 269 433 18 112 427 491 491 312 312 312 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 408 408 391 163 491 316 491 316 316 316 316 491 316 316 316 73 289 289 320 159 159 159 159 35 35 196 196 217 473 473 329 329 329 329 329 329 169 169 164 164 164 485 485 485 485 374 132 422 186 162 232 482 172 115 344 344 344 344 344 274 274 274 42 42 364 147 147 380 288 264 264 264 468 468 468 313 134 359 359 166 166 166 301 301 43 364 276 109 109 189 330 330 33 64 76 131 472 393 155 155 165 165 165 165 53 58 58 72 72 72 72 437 350 350 350 350 350 350 350 182 413 413 381 381 404 404 225 225 225 225 225 80 80 491 491 320 127 45 45 45 45 325 177 177 177 177 457 217 217 217 70 65 65 319 169 150 150 86 86 6 272 472 472 336 354 420 420 420 422 162 232 232 68 68 115 273 231 231 231 53 53 76 465 198 214 214 214 328 200 200 248 76 129 401 491 74 190 190 190 488 488 488 151 151 169 150 342 342 68 224 176 176 176 328 328 200 200 464 89 89 446 67 212 34 106 319 319 319 348 33 33 219 219 219 219 485 374 374 374 374 374 368 368 107 161 134 134 100 100 100 497 43 43 345 407 407 407 36 310 447 397 397 141 141 141 281 86 86 238 6 119 119 295 295 295 295 295 252 143 192 192 135 135 328 200 200 183 183 57 57 57 57 57 203 381 48 48 13 13 78 491 128 491 193 17 +17 17 17 296 296 317 184 184 491 373 451 451 451 30 301 378 364 345 141 141 141 281 342 342 198 22 283 455 38 338 338 338 395 395 106 480 480 480 85 85 146 146 464 459 459 459 31 31 86 238 6 472 196 196 473 136 136 136 136 136 282 388 199 199 255 255 251 251 241 241 431 265 265 265 85 85 85 146 299 173 352 89 89 322 67 199 58 72 72 72 110 171 171 171 171 252 143 36 449 191 191 236 314 36 108 377 87 58 72 110 110 202 202 202 460 169 352 402 402 6 272 87 87 87 416 144 180 84 496 88 88 88 255 255 399 70 70 65 319 319 319 348 200 248 478 66 482 482 238 6 161 79 288 290 290 290 290 434 339 339 212 310 395 334 334 304 304 304 49 269 168 168 157 157 313 313 36 377 377 123 123 88 88 14 411 475 475 475 475 475 475 475 324 301 129 259 74 425 425 386 386 343 343 343 343 358 318 39 342 9 142 397 345 109 109 498 245 313 183 451 451 30 30 30 301 399 217 70 65 65 428 428 428 146 146 325 131 72 72 110 110 486 486 460 460 402 402 96 272 87 87 87 236 259 108 119 119 351 405 405 405 405 405 405 206 206 169 233 458 192 419 427 491 491 247 312 126 292 292 292 292 292 292 21 21 21 21 21 21 21 408 408 408 149 149 228 82 320 7 217 473 65 486 486 486 460 329 169 164 164 164 164 485 485 485 485 374 374 132 132 132 236 32 401 259 161 161 79 79 380 288 443 151 169 150 150 86 86 86 238 6 272 180 230 230 230 230 215 215 35 29 345 333 333 220 220 44 44 44 43 364 276 346 346 346 428 428 146 146 385 131 472 221 458 144 27 437 437 481 481 481 481 481 175 175 81 300 300 382 406 467 467 89 89 446 116 394 212 161 161 79 499 499 499 428 85 85 146 173 173 280 176 135 135 200 464 340 340 199 44 44 44 8 32 259 354 431 151 151 151 416 416 192 41 41 41 41 41 19 454 229 247 247 126 126 326 326 326 101 101 149 149 228 289 320 320 345 141 141 281 31 342 232 232 68 68 172 115 231 231 231 231 231 53 394 76 164 164 214 214 214 214 200 200 248 212 127 45 45 45 45 236 401 401 259 384 371 278 278 278 314 196 242 242 33 64 212 131 472 72 72 72 110 486 486 460 460 460 215 35 29 29 242 242 94 199 199 106 426 426 426 169 349 352 352 352 242 275 275 303 303 303 48 48 417 417 417 237 237 491 28 28 491 305 305 491 491 362 305 491 491 491 491 362 366 491 366 366 316 491 491 435 316 435 491 491 73 289 7 7 217 473 258 258 258 342 342 224 494 494 494 281 9 142 397 147 147 329 329 329 329 329 252 36 310 107 395 302 302 302 302 497 98 98 13 229 82 247 312 126 126 326 326 101 101 101 149 391 228 491 289 491 74 437 437 284 405 426 426 206 348 64 64 212 300 382 495 406 467 253 253 253 99 99 338 338 400 400 400 400 30 301 399 217 70 65 65 265 428 428 85 146 146 358 385 36 227 427 427 229 247 126 126 126 326 408 408 391 228 228 289 491 144 27 389 389 389 314 196 196 217 473 476 476 476 476 476 143 458 96 196 32 196 309 309 309 309 479 331 231 231 231 231 231 349 164 214 214 214 214 328 200 200 335 14 411 287 284 223 223 223 223 130 280 277 277 277 277 277 385 24 227 419 439 439 439 439 225 128 193 193 17 +17 17 17 296 296 184 184 184 412 83 194 194 194 55 322 67 131 183 156 156 156 156 335 14 145 145 460 460 460 460 349 402 96 272 272 469 469 313 94 398 398 374 374 374 132 339 339 33 471 77 342 168 121 121 121 33 33 394 310 395 395 153 153 387 387 146 146 203 217 291 291 291 291 291 64 243 36 227 472 397 397 345 141 141 281 31 162 232 232 105 105 336 354 153 153 387 387 387 387 139 139 302 302 375 375 122 122 131 227 419 439 417 417 170 491 28 28 28 28 28 491 491 362 362 491 362 491 305 362 362 491 362 491 362 435 491 211 369 491 369 369 369 369 369 21 21 21 21 21 21 21 260 260 260 260 260 260 391 391 391 491 73 491 491 412 287 111 111 111 111 139 139 293 293 122 239 36 384 395 470 459 271 271 150 342 86 238 6 6 491 478 66 68 232 238 6 272 470 470 443 443 215 215 35 354 29 410 410 410 410 410 410 280 29 29 313 236 36 377 123 123 129 259 208 79 79 288 360 360 360 434 434 200 248 248 212 445 180 171 171 171 171 171 252 215 8 354 100 302 497 497 49 342 168 180 145 486 460 460 460 169 402 402 6 272 300 382 313 236 36 108 108 119 351 213 213 213 213 246 246 246 3 464 89 446 116 394 76 90 393 234 261 25 25 480 480 480 480 299 299 339 64 212 34 180 113 113 113 113 167 167 35 401 393 155 155 165 165 165 165 53 217 217 65 329 329 495 406 467 467 134 139 175 175 423 423 423 423 43 43 364 345 109 109 264 468 468 396 58 183 451 30 30 30 368 342 342 221 336 144 180 106 426 426 426 206 388 94 199 89 446 446 212 131 133 133 276 276 346 346 346 265 265 85 85 85 207 207 19 454 417 417 417 417 417 47 491 47 47 491 435 435 80 491 491 80 80 289 320 127 5 5 455 43 43 364 276 109 498 498 498 396 313 216 216 41 324 324 301 43 364 276 174 174 174 203 53 473 242 116 195 33 90 393 349 234 261 25 106 480 480 480 146 146 299 339 250 359 359 166 166 166 143 458 144 27 121 121 121 76 458 458 208 386 386 444 374 374 374 252 325 34 191 191 191 24 131 404 427 229 247 247 126 126 292 292 292 292 292 326 326 326 326 326 326 326 326 326 326 326 326 326 326 326 101 408 408 408 408 391 491 491 373 451 451 30 30 422 325 371 71 71 71 71 71 453 242 242 348 64 394 76 401 310 107 395 395 395 432 432 330 94 199 495 495 495 134 134 134 359 81 166 166 324 422 416 458 144 180 84 496 496 274 285 449 123 123 236 236 36 108 119 351 351 315 315 315 315 450 450 413 413 413 466 198 114 258 258 258 31 86 86 6 272 119 103 103 103 103 85 299 203 53 29 462 462 462 280 280 219 219 286 286 286 286 334 59 59 452 452 263 225 225 83 55 55 55 322 67 131 183 183 451 451 30 30 434 339 10 10 10 309 479 331 463 463 463 463 29 29 382 382 245 245 349 280 280 278 278 278 368 368 342 168 168 277 277 277 37 385 233 270 270 433 390 160 112 112 439 439 78 56 128 128 193 193 17 +17 17 17 363 51 51 228 491 491 412 118 118 118 118 402 451 30 30 422 314 90 133 147 147 380 499 319 319 319 94 199 145 113 113 113 113 240 285 34 462 130 402 221 36 108 119 119 308 308 308 396 313 94 199 199 230 215 35 478 232 232 232 172 115 444 444 444 213 213 252 24 131 183 451 30 30 30 378 378 345 389 389 389 389 314 196 242 33 33 394 212 161 161 79 380 288 443 443 169 39 342 342 168 230 230 230 230 215 35 29 89 89 116 394 465 108 119 295 295 295 295 295 143 458 96 198 198 283 455 455 8 32 354 354 431 151 151 240 416 416 192 41 41 324 422 36 36 377 87 87 416 458 27 180 84 496 496 274 349 205 155 155 332 332 245 399 70 70 138 138 138 138 138 372 372 372 59 452 263 263 229 491 491 312 15 15 15 193 193 193 +17 17 17 296 296 317 317 317 184 184 184 184 491 219 357 357 357 240 385 35 35 478 68 115 273 231 231 231 53 76 465 214 214 214 214 200 248 248 217 217 217 70 65 65 319 151 169 150 342 86 6 272 34 494 202 402 58 72 110 110 486 486 460 460 215 35 29 242 242 116 379 471 478 478 68 68 115 273 278 278 379 379 77 342 342 26 26 241 431 431 376 376 376 376 169 169 150 342 86 238 196 309 331 331 428 428 428 428 146 252 143 36 377 87 87 38 162 86 482 238 272 180 499 306 306 396 396 285 183 183 183 57 57 57 57 399 70 65 106 426 426 426 426 426 426 206 169 352 352 352 352 352 352 352 112 439 417 237 170 47 491 47 47 491 491 2 47 316 316 316 73 73 73 491 287 111 319 203 203 90 76 465 144 208 208 386 386 360 360 360 360 339 339 394 76 465 74 437 437 151 151 368 368 342 168 302 302 302 375 375 122 122 239 127 114 92 92 92 169 35 77 9 142 397 345 181 181 181 167 385 35 131 419 439 225 225 305 491 412 412 83 55 55 322 67 199 111 111 111 111 438 378 43 364 276 109 109 496 496 274 274 274 457 196 309 479 331 84 84 88 88 44 44 44 44 217 217 217 217 217 217 473 278 278 116 199 199 278 240 143 77 342 86 142 221 336 336 74 213 213 213 252 39 342 224 462 462 462 402 196 196 70 65 480 480 480 85 299 299 339 212 131 157 157 157 245 129 129 259 27 27 437 370 370 370 370 348 64 76 310 107 395 60 298 379 379 471 471 77 269 433 112 427 247 247 126 126 326 23 23 101 101 149 149 228 289 412 287 55 446 322 67 76 465 449 351 139 139 139 251 175 111 111 111 111 438 438 10 10 10 309 479 331 84 84 16 16 274 43 43 364 181 181 181 181 325 34 356 281 281 342 86 221 336 108 119 295 295 295 295 143 458 192 242 242 116 33 250 217 217 473 486 486 486 460 460 169 164 164 485 485 485 374 132 143 129 259 144 27 27 437 151 151 164 164 402 401 401 354 29 382 382 313 24 335 14 14 209 287 113 113 113 113 113 285 34 255 223 130 280 280 145 486 486 460 173 173 280 242 242 379 250 359 359 81 474 324 324 252 143 36 377 87 87 236 93 93 93 93 207 207 207 454 454 417 417 417 417 237 170 491 28 28 28 491 491 362 362 305 362 491 362 491 362 491 362 369 369 369 369 369 369 369 369 369 369 369 369 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 391 391 391 391 491 289 289 412 287 255 255 143 458 458 208 441 153 153 387 396 313 325 176 176 328 200 248 250 359 474 474 474 324 19 19 454 454 417 417 417 417 417 237 237 47 491 491 491 435 435 80 289 435 209 209 287 145 486 460 460 460 169 402 35 36 272 382 382 313 143 36 108 119 351 213 213 213 213 246 246 246 246 3 301 196 217 473 258 258 258 31 342 224 494 494 494 31 9 142 397 147 329 329 329 329 143 36 449 395 302 302 497 38 162 68 68 115 273 470 443 240 325 449 180 113 113 113 113 113 450 233 233 227 419 419 439 417 417 417 237 237 237 237 47 491 47 2 491 47 316 316 316 316 491 491 435 373 373 338 400 400 400 400 30 3 58 110 254 254 254 314 196 479 479 307 307 61 61 167 35 393 205 261 25 106 306 306 396 396 313 36 449 87 87 87 416 144 180 180 84 84 84 16 16 16 274 98 98 13 417 417 225 225 193 17 +17 17 17 296 296 51 184 320 320 127 5 455 236 129 259 354 278 278 278 278 278 252 416 416 416 192 472 225 397 225 225 80 80 197 147 147 147 380 499 486 486 486 365 460 203 53 53 53 212 354 302 175 81 176 176 135 328 200 200 248 335 14 14 411 411 153 153 372 372 396 313 143 310 107 395 382 382 313 325 34 121 121 53 53 394 212 212 180 180 486 486 315 450 450 88 372 372 396 313 24 34 58 72 72 72 72 268 268 268 268 268 268 450 450 274 274 271 186 39 323 9 142 397 336 345 109 109 109 264 313 216 216 22 5 455 236 458 27 27 351 151 151 169 169 164 164 472 221 336 354 29 498 498 313 313 143 77 270 342 342 26 26 251 241 431 431 278 278 120 120 173 173 352 352 272 419 229 247 247 126 126 292 292 326 326 326 326 101 101 101 149 149 228 289 320 345 141 141 141 281 453 168 44 44 38 38 232 232 232 105 105 445 445 470 365 365 365 365 460 330 388 64 131 472 472 221 458 208 208 441 441 441 153 153 153 387 387 285 285 300 382 382 467 69 69 130 130 280 44 44 44 399 70 65 65 265 265 265 85 85 85 85 139 293 175 175 175 230 230 230 215 402 198 198 22 283 455 42 42 147 380 380 496 496 496 496 274 24 131 393 205 155 165 165 165 165 53 250 251 251 241 431 329 278 330 388 379 33 471 49 9 142 58 72 437 481 481 481 481 481 293 175 175 81 84 84 84 496 274 98 13 13 417 170 170 491 170 491 28 491 28 491 28 362 362 362 362 362 362 362 491 491 491 40 305 491 305 305 305 316 435 491 435 435 435 491 491 435 435 7 465 108 377 87 87 8 420 420 420 422 186 338 338 395 395 487 498 498 498 498 59 59 59 263 229 229 247 126 126 126 326 326 326 326 326 326 101 101 101 149 149 228 289 320 127 5 5 455 251 251 251 241 235 235 235 235 235 235 235 348 200 248 248 248 251 251 241 431 290 290 290 290 290 434 434 434 339 195 33 250 217 473 476 476 476 476 252 325 325 191 191 191 325 34 44 44 416 129 259 144 484 484 484 236 314 401 401 259 371 485 485 139 139 302 497 497 349 234 234 261 25 498 498 498 498 498 493 216 216 300 300 334 334 59 452 452 263 263 417 417 237 237 237 237 47 47 47 491 47 491 491 73 491 73 289 7 217 473 65 486 486 329 460 169 169 164 164 219 485 485 374 422 143 458 144 27 351 329 329 329 169 169 164 164 472 221 336 354 495 498 498 313 385 35 77 342 86 142 393 393 261 25 91 91 91 91 91 206 493 216 216 300 334 334 59 59 452 263 229 247 247 126 126 326 326 326 326 408 408 149 149 491 412 83 83 253 253 253 253 253 99 338 338 338 338 338 338 395 180 499 499 265 265 265 265 85 85 85 146 146 464 89 89 446 446 394 478 66 68 68 115 273 265 265 265 85 146 146 146 175 175 81 11 11 11 64 76 465 34 253 253 253 253 342 168 257 257 257 257 31 162 68 68 68 115 273 273 319 319 319 388 94 199 145 145 145 460 460 460 460 402 402 96 272 300 382 382 58 58 57 57 57 57 57 57 203 381 117 404 229 247 15 193 193 17 +17 17 17 363 51 51 491 412 412 254 254 254 254 131 221 458 144 180 106 405 405 206 240 285 34 253 253 31 31 86 142 142 393 234 261 25 106 106 481 481 306 306 306 306 372 406 467 467 255 255 255 43 43 345 109 109 403 403 403 207 464 253 253 253 342 342 30 30 30 422 129 74 437 437 437 405 405 206 169 150 342 342 224 494 494 236 129 259 26 359 359 166 166 166 422 129 259 144 27 389 389 389 120 37 24 24 131 472 393 155 165 165 165 165 53 473 58 183 257 257 257 31 342 86 142 393 261 25 470 443 139 175 81 84 84 84 274 399 217 217 473 136 136 136 136 282 388 303 195 404 133 364 345 333 333 220 216 114 180 113 113 113 113 113 450 167 35 335 14 411 145 145 145 460 460 460 178 178 96 96 99 436 436 395 134 134 134 134 134 134 359 359 166 166 166 324 301 42 147 456 456 456 236 36 161 161 487 487 487 213 213 252 325 325 176 176 135 135 200 200 464 340 340 116 64 76 108 377 123 123 216 22 283 455 43 43 364 276 109 109 496 496 496 37 37 37 24 471 270 270 323 323 18 97 397 336 82 409 409 409 409 67 58 183 30 30 30 422 349 205 261 25 180 315 315 450 450 413 64 131 34 277 236 325 34 257 257 257 31 342 142 72 72 350 350 350 350 203 53 53 394 478 66 86 238 6 272 470 470 120 120 120 37 24 131 419 427 491 491 247 312 312 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 326 21 408 408 408 408 391 491 491 289 491 144 208 79 288 288 360 360 360 434 339 33 90 212 445 445 180 171 171 171 171 252 215 8 29 302 302 497 497 497 185 49 9 397 397 345 141 141 281 281 9 142 221 336 336 354 278 278 139 139 139 293 293 122 122 449 449 415 415 415 129 198 198 22 455 38 349 234 234 261 25 498 498 498 498 396 240 216 114 300 459 271 31 162 86 68 6 272 470 470 120 120 120 240 314 314 259 108 377 123 223 130 402 257 257 257 257 453 9 221 336 336 208 208 386 386 485 286 286 286 468 313 313 24 314 26 26 251 241 431 294 294 294 294 294 282 282 388 303 303 212 131 419 78 491 491 247 312 126 292 292 292 292 326 326 408 149 149 228 491 491 83 83 55 55 322 67 466 212 212 127 114 264 264 468 468 406 467 177 177 177 131 133 133 364 345 141 141 141 141 141 368 368 31 342 86 86 6 108 377 123 123 216 216 258 258 258 258 31 342 86 86 6 6 371 93 93 93 93 207 207 19 19 454 417 417 417 417 417 193 193 193 +17 17 17 296 296 184 184 320 320 320 354 264 264 264 468 468 468 313 359 359 166 166 166 301 378 280 280 278 278 278 368 453 342 168 494 134 8 8 100 100 100 100 497 497 349 155 155 165 165 165 165 466 466 22 283 455 217 217 473 290 290 290 290 290 434 434 339 33 250 42 147 147 380 288 496 496 496 496 274 274 24 325 34 255 255 255 175 241 235 235 235 235 235 235 235 413 200 248 250 345 407 407 407 407 407 310 107 447 447 483 226 226 209 287 297 297 297 297 297 293 293 122 216 22 448 448 448 464 464 493 493 493 493 216 300 300 382 382 313 335 14 226 226 209 411 145 145 486 460 460 173 280 29 242 116 359 466 81 166 324 324 3 58 72 72 268 268 268 268 268 268 450 274 368 368 9 168 300 50 50 50 49 9 142 397 336 347 347 347 313 186 162 54 172 344 344 344 344 344 186 186 162 482 482 482 115 273 344 496 496 186 99 436 436 395 134 134 134 8 359 359 166 166 166 422 162 68 68 115 470 278 278 143 310 107 395 485 469 134 88 158 158 158 158 158 24 325 191 191 191 37 24 419 439 78 170 170 28 491 28 28 491 491 28 491 362 305 362 362 362 40 40 40 40 201 491 366 491 491 305 366 366 491 316 316 316 491 316 73 73 289 7 217 473 258 258 258 342 342 224 494 494 494 31 9 142 397 147 329 329 329 329 329 36 107 107 395 302 302 497 497 251 251 241 81 81 278 278 330 388 379 195 64 212 131 419 439 225 225 225 225 80 491 75 371 371 278 278 314 196 309 479 307 307 307 61 61 167 131 90 221 458 144 27 437 437 437 437 437 481 481 481 481 481 481 293 293 293 497 497 335 197 197 197 197 197 197 7 251 251 251 251 251 241 241 278 278 173 173 280 176 176 176 135 328 200 200 200 340 340 340 340 394 478 342 224 273 344 344 344 449 449 44 44 44 129 259 74 425 425 386 343 343 343 343 343 358 358 358 39 39 433 160 97 97 197 226 226 80 491 491 491 7 7 251 251 251 251 241 431 278 278 278 173 173 176 176 176 176 328 328 200 200 200 200 335 335 209 209 415 415 415 415 415 131 106 106 106 297 297 297 297 182 375 375 375 98 98 13 13 229 170 170 312 312 312 312 292 292 292 292 292 292 21 21 21 21 21 21 21 21 408 408 408 408 408 391 391 491 491 412 177 177 177 177 177 356 356 142 238 238 6 310 395 395 151 169 150 342 342 86 238 6 491 478 66 66 68 232 68 238 6 272 470 470 403 403 403 171 171 171 246 246 176 176 328 328 200 200 248 248 216 114 92 92 92 92 35 77 77 9 397 221 276 181 181 181 181 181 240 385 35 227 419 419 439 225 225 225 225 225 225 225 225 225 225 373 373 373 338 338 400 400 400 400 422 422 162 68 68 68 273 470 120 120 120 120 37 37 24 34 253 253 253 99 338 338 400 400 400 30 422 162 232 232 232 238 6 6 371 470 189 151 215 215 35 96 96 272 272 255 255 251 251 241 431 235 235 235 235 235 235 248 248 212 22 283 455 236 239 239 384 371 213 213 213 252 215 129 259 402 133 42 42 42 147 380 499 151 151 240 240 449 449 191 191 191 37 314 314 90 90 401 401 491 208 208 79 380 380 486 486 376 460 460 169 150 342 342 342 224 41 324 324 301 251 251 251 241 431 431 290 290 290 290 434 434 434 339 339 117 404 229 247 247 126 326 193 193 17 +17 17 296 296 296 184 184 320 354 153 153 153 153 387 387 387 396 285 131 300 382 382 313 131 133 133 345 333 220 220 220 133 133 364 276 276 346 346 346 265 85 85 85 85 139 293 293 122 122 472 133 147 147 380 288 496 496 496 274 368 368 9 142 221 336 354 109 278 278 99 99 436 107 50 50 50 50 50 185 269 433 433 160 112 427 491 491 312 312 126 292 292 292 292 292 292 326 326 21 21 326 326 21 21 21 408 408 408 408 149 228 289 177 177 177 356 356 342 86 238 196 479 331 231 231 231 274 274 43 364 276 174 174 319 319 348 348 64 212 300 334 382 382 245 399 217 473 65 486 486 460 169 169 164 164 485 485 485 8 345 88 109 242 242 116 250 250 70 65 65 498 245 42 147 380 134 134 139 175 175 423 423 423 423 353 353 353 406 467 245 245 8 32 401 401 401 354 354 106 496 496 496 274 169 164 164 224 494 255 251 251 81 278 278 26 26 302 302 497 335 14 14 411 284 284 284 405 405 405 206 206 206 206 37 24 131 404 404 439 225 225 225 225 417 80 491 80 7 7 251 241 241 278 278 278 173 280 176 135 135 135 200 199 255 255 43 43 364 276 109 403 403 403 403 171 246 324 301 8 259 354 376 376 376 376 376 178 178 458 458 192 183 183 286 286 286 286 286 286 468 245 245 8 354 62 62 62 62 438 216 114 57 203 53 394 478 342 232 172 115 279 279 279 279 279 279 279 375 375 233 270 270 269 433 390 112 112 56 491 56 47 491 305 491 187 187 187 187 391 391 316 491 73 73 73 491 289 108 119 487 487 487 288 213 213 246 246 318 368 453 168 106 353 353 353 353 396 35 465 472 196 70 70 383 383 383 383 36 107 447 221 336 144 27 437 319 319 53 53 76 465 242 242 242 94 41 41 41 41 19 19 454 229 247 126 126 126 326 326 326 326 326 326 326 326 326 101 101 101 149 149 228 491 320 127 114 84 84 496 274 236 239 384 371 485 286 286 286 468 382 313 10 10 479 331 84 84 84 496 16 274 368 368 453 342 168 118 118 118 118 402 198 198 114 0 0 0 0 301 378 43 364 347 347 347 347 498 467 396 313 216 216 114 0 222 382 313 314 314 239 354 420 420 420 464 464 44 116 94 479 230 230 169 169 352 352 69 223 223 130 402 402 198 114 57 57 57 203 381 381 117 404 439 439 439 439 439 439 237 78 78 170 491 491 312 312 312 12 12 1 292 21 21 21 408 408 408 391 391 391 228 491 412 412 287 111 111 438 438 314 133 133 380 499 499 493 216 216 300 382 134 251 241 367 367 367 458 192 415 415 457 401 401 259 74 351 213 213 213 213 252 215 259 259 29 100 100 375 375 98 98 13 417 417 417 237 237 237 237 237 237 237 237 237 237 237 237 237 47 316 491 491 316 316 491 491 491 435 289 289 289 108 377 87 8 420 420 420 420 422 99 338 338 395 395 487 498 498 498 59 59 59 452 263 263 417 417 193 193 193 +17 17 296 296 296 184 184 184 320 435 0 0 0 0 422 162 68 115 444 444 444 360 434 339 394 90 76 144 445 121 121 116 33 76 465 108 432 432 432 432 379 64 76 449 191 191 191 325 34 34 196 309 479 479 331 230 230 230 169 169 352 352 221 401 259 159 159 159 236 259 127 361 361 361 94 199 111 111 111 438 438 162 342 224 494 494 494 129 74 84 84 496 496 496 496 274 274 368 368 9 9 198 114 0 222 222 468 313 313 219 219 219 485 485 374 374 186 186 323 86 238 6 377 123 123 88 277 277 277 277 385 131 227 419 439 78 229 170 491 312 312 292 292 292 292 292 292 326 326 326 326 326 326 23 23 101 101 408 391 391 491 491 412 287 44 8 8 354 354 106 284 91 206 206 240 325 41 324 324 143 144 389 389 389 389 200 248 192 445 213 324 219 219 219 219 485 374 374 368 162 54 86 6 272 123 123 88 109 109 475 475 94 475 475 475 422 349 164 214 214 214 214 214 328 200 200 117 404 454 225 225 225 225 225 225 451 451 213 357 357 357 173 280 29 242 116 33 465 377 123 8 354 420 420 420 360 135 135 200 200 248 58 58 72 72 110 110 486 486 365 365 365 328 200 200 195 195 248 248 212 239 34 253 253 253 253 453 342 342 198 22 448 448 448 464 145 284 306 306 468 406 467 288 469 469 99 436 447 221 196 473 275 275 379 394 478 342 68 115 470 470 120 120 120 37 24 117 263 417 417 417 417 237 237 237 491 491 28 28 491 28 491 491 305 362 491 491 362 362 362 491 362 362 491 40 211 211 491 218 491 369 369 369 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 260 391 391 228 228 491 320 320 333 333 333 220 314 35 401 259 127 114 258 258 258 258 258 31 39 342 433 160 97 221 336 196 473 258 258 258 258 342 224 494 494 494 31 9 142 397 397 147 329 329 329 329 329 143 36 310 395 302 302 302 497 497 162 232 232 232 238 6 6 371 470 189 443 151 215 35 35 96 401 272 34 180 113 113 113 113 113 285 449 223 223 130 198 198 22 283 455 251 251 251 241 431 290 290 290 290 290 434 434 434 339 339 117 117 404 225 225 225 225 225 80 226 491 209 188 340 340 340 33 33 64 212 377 123 123 216 22 283 455 8 354 180 376 376 376 376 460 178 178 35 458 192 219 485 180 306 306 306 306 306 306 396 24 285 69 462 462 402 402 401 259 208 79 288 288 213 213 360 434 434 339 248 394 239 445 180 171 171 171 171 252 215 8 354 100 100 375 375 375 185 185 269 433 433 18 18 112 439 439 237 237 237 237 47 47 491 491 2 2 316 316 491 316 316 491 73 289 289 7 7 4 104 104 104 104 104 104 468 468 337 337 337 324 301 416 32 32 208 79 79 79 288 288 360 360 360 246 246 246 434 434 339 94 199 89 194 194 194 446 446 64 212 212 465 196 196 309 398 398 213 213 246 246 252 143 36 108 119 351 89 446 446 446 394 76 401 74 190 492 492 38 162 342 68 115 273 265 265 265 428 85 146 146 358 358 39 342 142 397 345 141 141 141 281 453 342 198 198 114 92 92 92 240 314 131 219 219 180 180 106 306 306 306 306 306 59 59 37 131 419 427 229 247 247 15 193 193 193 17 +17 17 17 363 363 51 51 51 184 373 66 68 115 273 470 443 443 240 449 449 34 255 8 354 180 113 113 113 113 113 113 113 450 413 413 36 449 34 106 125 125 125 125 203 250 250 250 70 174 174 174 319 319 348 33 394 478 478 68 68 115 273 273 265 265 265 85 85 85 146 24 133 133 345 333 333 220 220 142 221 401 491 208 79 484 484 484 484 484 252 252 457 36 472 472 401 401 491 74 74 351 351 171 171 252 143 36 161 161 487 485 485 485 464 106 499 306 306 396 396 178 178 458 144 302 100 497 497 497 497 364 364 364 276 109 278 139 175 175 81 81 84 84 84 496 274 185 185 185 269 433 18 427 229 247 247 126 126 292 292 326 326 326 326 326 326 326 326 326 408 101 149 149 149 149 228 412 83 83 194 194 194 322 67 466 212 22 448 448 448 464 464 493 493 493 493 493 216 300 300 382 382 245 378 43 345 333 333 220 220 142 221 401 401 491 74 190 190 487 288 278 278 203 53 53 195 195 195 250 251 251 251 241 431 284 426 426 426 203 53 212 212 29 29 495 313 313 325 41 41 41 41 19 318 185 185 433 433 433 160 112 112 417 417 237 237 237 237 237 237 237 491 2 491 201 201 491 316 435 491 491 435 435 435 435 289 289 491 7 479 331 307 307 61 61 285 44 44 44 44 38 482 482 482 482 238 6 336 161 79 487 288 288 403 403 171 171 246 246 252 422 186 162 232 232 232 68 238 6 272 470 278 278 278 178 458 458 192 472 196 196 309 331 157 157 157 157 372 396 186 162 54 482 482 238 238 6 371 180 84 84 84 496 274 413 413 413 195 250 250 345 141 141 141 281 9 238 6 272 87 87 354 420 420 420 420 422 162 68 68 115 267 267 267 267 267 434 434 434 434 339 303 303 404 13 491 247 247 126 126 292 326 326 326 23 101 101 149 228 491 491 373 155 155 155 332 332 332 372 245 399 70 258 258 258 31 342 224 494 494 368 453 142 142 397 147 329 329 329 329 329 329 36 310 107 395 302 302 302 302 497 497 43 43 364 364 345 389 389 389 314 58 58 110 202 202 202 202 402 478 66 68 68 267 267 267 267 267 434 339 94 277 277 277 37 37 325 34 118 118 118 118 402 402 472 177 198 127 114 0 222 222 468 245 58 72 110 110 254 254 254 240 314 131 133 401 401 354 137 137 137 137 275 303 303 303 48 404 439 439 439 78 237 47 47 47 491 491 47 316 316 491 73 289 73 491 491 190 488 488 488 488 488 428 146 146 173 173 29 469 469 236 36 26 359 359 359 474 474 474 324 186 99 338 400 400 400 30 378 8 345 141 141 281 453 342 168 168 223 223 130 129 198 198 448 448 448 464 464 255 143 129 259 74 351 351 278 278 360 339 398 398 398 464 275 275 116 64 212 198 45 45 45 236 129 196 196 473 65 329 329 406 406 288 288 139 175 175 175 423 423 423 143 458 144 27 437 151 151 169 169 402 35 221 336 354 29 498 382 313 313 35 131 472 482 482 482 482 397 397 189 189 189 189 169 35 96 96 272 472 472 198 127 114 92 92 92 92 240 385 449 449 219 464 180 106 306 306 306 306 206 396 285 285 34 84 410 410 410 410 173 173 29 29 495 495 406 406 467 467 253 253 453 9 168 14 14 411 284 405 405 206 169 349 352 352 29 242 242 116 94 199 253 253 253 253 338 338 400 400 400 30 422 162 232 482 397 397 189 189 189 215 215 35 96 272 472 156 156 156 156 245 58 72 72 268 268 268 268 268 268 450 450 271 186 39 390 390 390 18 427 56 247 312 15 15 15 292 292 292 292 15 193 193 193 193 193 17 +17 17 17 296 363 363 363 363 363 363 363 101 51 51 51 228 228 491 320 345 174 174 174 174 174 348 348 64 248 465 144 389 389 389 389 34 202 202 173 280 280 444 213 213 213 252 314 196 242 242 116 479 199 44 44 44 217 217 217 473 398 213 213 213 286 139 139 302 302 375 375 335 226 14 287 287 284 405 405 206 169 352 402 198 198 22 283 455 416 144 208 79 380 315 315 315 315 450 450 413 413 212 212 131 133 345 333 333 220 220 216 180 113 113 113 113 167 35 131 14 14 411 410 410 410 410 173 29 29 382 245 8 129 259 190 380 288 288 278 203 53 176 176 135 328 200 200 248 248 212 22 283 455 129 259 74 190 492 492 245 173 280 280 498 498 498 498 498 396 215 8 354 337 485 485 464 139 302 497 497 497 129 401 401 259 74 351 351 443 443 178 178 458 192 192 462 462 462 402 32 239 384 371 498 498 498 498 498 59 396 385 24 227 419 419 439 417 417 237 237 237 237 237 237 491 28 237 28 362 491 491 362 491 491 362 362 362 491 362 362 491 435 211 491 369 369 369 21 21 21 21 21 21 21 21 21 21 408 408 408 149 149 228 289 491 7 217 258 258 258 258 342 342 224 494 494 494 281 9 142 397 336 147 329 329 329 329 329 329 310 107 395 302 302 302 497 497 497 42 42 147 380 486 486 486 460 460 215 35 96 36 472 66 68 68 172 482 105 196 70 70 65 306 306 396 396 313 35 26 26 359 359 474 474 324 464 415 415 415 35 22 283 455 416 458 458 445 278 278 278 36 310 107 107 60 298 116 33 33 394 212 384 371 153 153 153 153 372 372 372 59 59 452 263 225 225 225 83 83 55 55 322 67 67 478 478 232 232 232 68 238 6 371 470 443 443 443 215 35 96 96 272 34 340 340 116 33 250 250 250 409 409 409 409 116 33 394 32 239 354 278 278 278 314 196 242 242 242 33 394 76 36 377 87 87 236 239 371 371 374 374 374 132 186 162 54 172 224 273 84 84 84 84 16 98 98 13 417 417 417 417 237 237 237 201 211 211 187 260 260 391 391 391 491 316 316 73 491 289 491 127 5 5 455 143 458 445 445 278 278 278 278 143 310 107 107 395 298 298 116 94 199 415 415 415 415 131 221 401 144 79 79 288 288 360 360 434 434 200 248 212 239 445 180 171 171 171 171 171 252 8 354 100 100 302 497 497 81 253 253 453 142 221 345 141 141 281 453 342 168 44 236 236 36 310 107 395 485 286 286 286 313 349 349 155 262 262 100 175 81 255 255 129 259 74 437 437 306 306 306 396 396 457 233 196 291 291 291 291 243 243 227 427 427 247 247 15 126 15 292 292 292 193 193 193 193 17 +17 17 17 296 363 363 363 51 51 51 51 184 491 491 412 287 157 157 157 157 372 372 372 396 396 245 245 43 364 364 345 389 389 389 240 325 34 202 202 202 402 221 259 354 137 137 137 137 137 33 394 394 76 310 107 107 395 395 286 286 286 468 396 245 349 349 155 262 262 262 100 100 375 98 98 117 417 417 225 80 80 491 209 118 118 118 118 118 402 177 177 177 177 36 34 254 254 254 314 196 479 331 307 307 307 61 61 167 457 35 259 137 137 137 137 137 33 394 478 478 68 172 344 344 344 344 274 129 129 74 74 72 72 290 290 290 290 290 339 339 33 90 393 155 262 262 262 359 359 166 166 166 324 422 143 129 458 144 208 208 208 386 386 386 444 360 360 360 360 246 434 434 339 94 199 253 253 253 253 31 342 86 238 6 272 87 87 87 416 458 445 485 278 278 173 173 29 277 277 385 314 478 478 68 115 273 231 231 231 53 53 76 198 214 214 214 328 200 200 200 69 223 223 130 198 198 22 448 448 464 464 255 255 129 129 74 74 485 485 485 485 286 286 468 468 467 337 11 11 11 11 11 379 379 77 77 342 342 224 69 69 223 130 44 44 44 116 94 199 319 319 319 348 33 33 219 219 219 219 219 485 485 374 374 374 132 318 368 368 54 54 238 272 472 221 336 74 74 437 437 306 306 306 306 306 396 396 134 175 175 81 334 334 334 334 59 452 452 13 491 247 312 312 126 292 292 292 292 292 292 326 326 326 21 21 21 21 21 21 21 408 408 408 408 149 228 491 412 177 177 177 177 77 9 142 397 336 276 109 109 278 330 348 33 64 212 384 84 84 84 84 496 274 185 49 9 26 26 241 367 367 367 367 143 96 36 131 483 226 226 209 411 213 213 213 213 213 252 318 39 39 342 86 86 238 6 272 483 440 89 89 446 446 446 67 212 131 133 364 276 109 109 443 443 120 120 271 150 39 433 433 160 6 6 227 419 439 439 439 225 225 225 225 237 47 47 491 80 491 373 373 373 155 155 487 487 487 374 374 216 216 283 455 455 43 276 109 109 443 443 443 150 150 86 86 238 272 397 397 364 174 174 174 174 319 348 195 195 195 404 404 229 491 247 312 126 292 292 23 23 23 101 101 149 391 228 491 289 320 7 241 367 367 367 367 192 192 176 135 135 135 200 464 464 113 113 113 113 113 167 449 34 125 125 125 125 466 22 455 455 8 259 180 376 376 376 376 376 460 178 178 458 192 219 219 219 180 180 106 306 306 306 306 306 306 59 37 24 131 404 229 229 247 126 126 326 326 326 408 408 149 228 228 289 491 144 445 210 210 210 210 210 203 53 44 44 44 44 349 234 234 234 261 425 386 386 431 151 151 240 240 285 34 69 462 462 130 402 402 196 217 217 217 217 473 65 443 443 139 175 175 241 81 84 84 84 496 274 274 274 236 32 401 36 310 107 395 485 374 374 374 374 132 132 132 242 116 33 33 33 394 478 478 232 172 172 115 273 319 319 319 348 348 466 250 241 431 431 428 428 428 428 146 358 385 233 227 419 419 439 439 78 170 170 491 47 491 2 491 2 316 316 316 491 316 491 73 73 73 491 354 159 159 159 35 35 198 22 5 448 448 14 411 411 213 213 213 213 213 186 39 342 86 86 238 6 272 472 397 336 276 174 174 174 174 275 388 303 303 117 48 417 78 170 491 491 421 491 128 128 193 193 17 +17 17 17 296 296 317 435 491 184 184 184 184 320 345 109 409 330 330 67 77 77 54 54 219 152 152 152 152 143 129 144 144 180 106 405 405 405 206 206 167 167 35 227 227 419 225 225 226 226 209 44 44 44 416 458 458 208 208 425 386 386 431 278 278 278 53 53 76 76 270 342 342 172 224 69 462 130 130 198 198 22 283 455 455 129 259 354 425 425 241 431 374 374 374 374 374 132 132 413 203 381 381 381 212 212 32 197 197 197 197 491 197 197 7 7 364 276 346 346 346 428 428 428 146 146 358 35 35 401 131 472 472 401 80 80 491 310 107 107 395 351 264 264 468 468 468 337 337 337 324 422 24 36 161 161 487 487 288 213 213 246 318 318 49 342 168 340 340 116 466 22 283 455 455 251 241 431 431 443 443 169 169 352 402 402 272 483 14 411 153 153 153 372 372 396 313 36 310 107 395 334 334 59 313 24 24 404 427 247 247 126 126 292 326 326 326 326 326 326 101 149 149 149 491 412 83 55 55 55 322 67 10 10 309 479 331 331 284 405 405 206 206 240 325 34 176 176 135 135 328 200 200 200 248 248 478 162 68 68 68 26 26 26 241 431 432 432 432 330 64 64 212 131 300 382 382 382 245 245 129 129 354 354 498 498 498 498 396 186 35 36 310 107 107 50 50 50 50 50 49 342 342 68 221 336 384 371 180 315 315 315 450 450 348 94 199 340 340 466 22 283 455 455 58 72 72 437 437 481 481 481 481 175 175 81 84 84 84 274 274 8 259 354 62 62 62 62 216 216 22 283 455 8 129 259 190 380 380 499 496 496 496 274 233 233 458 419 427 229 247 126 126 292 292 292 292 292 23 23 23 23 101 101 101 149 149 228 228 320 345 141 141 141 281 453 142 221 221 336 208 79 79 79 288 360 360 360 434 434 339 64 212 131 180 180 410 410 410 410 173 29 29 382 245 245 8 8 62 62 62 62 62 146 464 44 44 44 236 36 108 119 119 351 351 486 365 365 365 200 200 212 212 302 302 302 175 175 462 462 462 462 4 4 4 280 106 284 480 480 480 480 480 85 85 299 299 299 299 339 303 471 471 471 49 433 433 433 160 112 112 439 56 56 237 237 237 491 28 491 28 491 362 362 362 491 362 362 491 362 362 491 491 362 491 211 491 491 369 369 21 21 21 326 408 408 408 408 228 491 289 491 373 451 451 286 286 286 286 468 468 313 186 162 232 68 115 273 470 486 486 460 460 240 35 472 196 196 70 65 65 495 406 467 288 139 175 175 423 423 423 143 129 144 27 437 437 151 151 169 169 164 164 164 221 336 354 29 334 334 59 59 385 233 465 419 439 439 225 417 417 80 80 80 7 345 409 409 409 409 76 310 107 400 400 30 30 422 422 342 342 273 470 486 486 460 240 285 34 415 415 415 325 131 106 106 297 297 297 375 375 98 98 263 417 417 417 417 237 237 237 237 80 491 491 483 287 287 297 297 297 297 297 43 43 345 109 109 109 171 171 318 186 162 232 68 68 26 26 425 241 431 428 428 428 146 143 26 26 359 166 166 166 301 143 36 490 490 490 38 162 482 482 238 6 161 487 499 151 151 150 150 86 142 142 393 262 100 100 175 81 462 462 130 162 68 68 273 319 319 319 348 33 64 212 310 107 395 180 480 480 480 85 299 299 303 48 229 229 247 15 15 193 193 193 17 +17 17 17 296 296 363 363 52 52 52 52 52 52 51 51 51 51 184 184 289 491 209 83 83 194 194 194 194 322 67 67 131 183 183 183 451 286 286 286 286 468 468 313 186 99 338 400 400 400 400 30 422 186 162 232 68 68 115 470 486 486 486 460 167 167 35 401 196 309 479 331 315 315 315 315 315 450 450 98 98 417 417 417 417 417 417 237 237 47 47 491 47 491 80 80 73 80 80 7 7 309 309 479 278 278 278 278 36 449 449 176 176 328 328 328 200 303 117 98 13 417 417 417 417 237 237 47 47 47 491 80 80 80 80 435 209 83 83 194 194 322 67 67 212 22 5 455 236 36 108 119 351 351 171 171 171 252 8 29 302 302 497 497 8 354 255 255 58 58 72 72 480 480 480 480 299 299 339 212 131 156 156 156 156 245 245 43 345 141 141 281 342 26 26 241 431 476 476 476 252 252 36 393 393 155 332 332 332 186 162 342 115 273 151 151 151 215 215 29 29 334 334 59 59 59 263 263 417 417 417 237 237 237 491 28 28 28 362 362 491 362 362 491 362 362 362 491 362 362 362 491 362 362 362 218 218 218 218 218 218 491 491 211 211 369 491 369 369 369 369 369 369 369 369 260 260 163 163 163 316 316 316 491 316 491 73 289 289 7 217 473 258 258 258 342 342 224 494 494 368 453 9 142 397 147 380 329 329 329 329 252 143 310 107 395 302 302 302 375 375 375 98 98 13 225 225 225 225 80 80 80 80 80 320 354 255 255 255 349 349 155 155 148 148 148 372 372 313 186 338 400 400 400 30 58 110 254 254 254 314 90 393 205 25 470 264 264 468 468 134 359 359 166 166 166 422 129 458 208 208 386 386 496 496 496 496 368 368 453 9 142 198 22 22 283 455 384 371 106 153 153 153 372 372 372 59 59 452 263 229 247 126 126 326 193 193 +17 17 17 296 296 317 491 491 184 184 184 184 184 320 320 127 114 0 0 0 378 378 43 347 347 347 347 313 186 164 164 164 119 487 487 487 288 213 324 324 301 143 259 74 425 425 386 386 431 403 171 171 252 24 270 270 342 26 26 26 241 241 431 403 403 171 171 207 358 37 24 131 427 229 247 247 126 126 292 292 326 326 326 326 326 101 101 101 149 149 228 491 373 66 115 273 273 496 496 274 216 216 45 45 45 236 35 196 196 217 65 65 329 495 406 467 256 256 139 175 175 423 423 423 423 423 399 217 70 65 65 151 169 150 342 142 221 336 420 420 420 464 154 154 154 458 458 96 86 105 105 336 470 470 151 178 178 96 96 36 272 176 135 328 200 200 248 248 478 342 68 68 115 273 231 231 231 53 250 250 174 174 174 319 348 33 58 72 72 72 437 350 350 350 350 203 203 250 250 250 333 333 333 220 220 402 196 196 217 217 65 486 486 460 460 169 164 25 485 485 485 374 422 143 36 377 87 87 236 36 108 108 119 213 213 213 213 246 246 246 19 19 454 454 229 170 491 312 312 312 187 292 292 292 12 12 12 21 408 408 408 260 391 491 491 316 491 316 491 73 289 7 7 7 354 159 159 159 159 240 314 35 35 127 5 5 455 236 129 239 384 371 278 278 278 99 436 436 395 395 50 50 50 50 49 9 142 397 336 347 347 347 467 14 411 204 204 204 204 204 29 337 337 337 301 422 239 384 93 93 93 93 171 171 252 252 314 239 384 371 371 278 278 99 99 436 436 395 50 50 50 50 50 185 185 269 433 390 390 160 112 97 225 225 225 225 225 225 80 491 491 412 83 55 55 55 322 466 127 0 0 222 378 43 345 141 141 281 453 168 168 350 350 350 350 348 250 466 81 166 166 324 422 143 401 259 208 208 208 190 487 499 499 486 460 460 215 280 106 486 486 460 215 215 74 100 302 497 497 122 129 259 190 492 492 492 368 9 9 168 300 498 498 498 498 498 498 59 59 59 304 173 173 49 269 342 168 89 89 446 446 116 195 195 195 212 133 133 364 276 174 174 174 174 348 348 33 33 90 90 76 144 27 437 437 480 480 480 85 299 299 299 212 131 462 462 462 402 221 259 445 445 351 351 171 171 171 171 358 358 358 233 458 192 427 229 247 126 126 326 326 326 326 326 326 326 101 101 149 391 491 373 66 68 172 115 273 494 240 216 216 45 45 45 45 314 35 198 22 448 448 448 464 154 154 154 458 96 86 105 105 336 470 151 151 178 178 96 96 36 272 191 191 131 472 458 144 27 27 437 319 319 319 53 53 76 35 259 242 242 242 116 41 41 41 324 422 143 465 458 389 389 389 389 314 196 479 331 307 307 61 61 215 35 35 354 420 420 420 3 14 411 475 475 475 475 475 475 475 301 143 129 74 492 492 236 236 129 75 108 278 278 278 178 458 458 192 485 134 134 359 81 300 382 245 143 82 144 27 351 319 319 203 53 394 76 465 29 242 242 242 41 41 41 19 19 454 13 417 491 170 421 421 491 491 491 421 421 128 491 128 128 491 128 128 193 193 17 +17 17 17 296 296 296 296 296 52 52 52 52 52 52 408 408 51 51 228 184 184 289 219 357 357 357 357 240 385 35 401 401 401 197 197 80 80 491 320 276 181 181 181 181 240 285 34 223 223 130 402 196 196 217 473 65 329 329 460 460 169 164 164 485 485 485 485 257 368 368 9 142 397 221 364 364 364 276 346 346 428 428 428 146 252 35 131 472 221 458 144 144 27 437 437 437 437 437 437 481 481 481 481 481 481 175 175 81 300 300 382 382 406 467 467 89 446 116 466 22 283 455 38 162 482 482 482 482 115 273 106 153 372 372 406 467 467 302 302 497 497 399 399 217 217 217 217 473 65 264 264 264 264 264 264 468 468 59 59 59 452 452 263 263 13 78 170 170 170 491 491 2 491 2 187 491 187 187 163 163 163 391 391 391 316 73 73 491 491 7 217 473 258 258 31 342 342 494 494 494 368 9 9 142 397 147 329 329 329 329 329 329 36 310 107 395 302 302 302 497 497 43 345 141 141 141 281 342 221 336 445 180 443 240 325 449 176 135 135 200 248 248 90 393 234 234 234 261 261 25 264 264 264 264 468 313 134 359 359 166 166 166 324 422 32 401 401 401 401 401 239 384 371 278 278 278 368 453 342 168 41 41 41 324 246 301 43 364 333 333 333 220 216 198 114 258 258 31 342 342 224 273 319 319 319 348 10 10 219 219 219 398 485 374 374 374 374 374 368 368 107 395 397 134 100 100 100 497 497 399 217 217 217 217 473 258 258 258 31 342 86 238 6 272 495 495 495 41 41 324 464 255 8 354 180 113 113 113 167 167 35 472 472 221 259 208 208 441 441 441 346 346 265 265 85 85 146 146 146 277 277 277 24 131 449 472 225 226 226 226 226 491 209 287 319 319 348 33 33 250 217 217 473 65 258 258 31 162 232 68 68 68 238 6 272 485 485 468 468 337 337 324 324 464 459 459 459 368 31 9 142 221 336 208 79 79 288 213 213 213 246 339 339 33 248 248 212 465 445 180 171 171 171 171 252 252 8 354 100 100 302 375 375 375 185 49 433 390 18 427 56 56 247 312 312 292 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 163 163 163 491 491 163 491 163 491 491 316 491 316 491 316 491 316 73 73 491 320 465 144 180 484 240 325 34 213 213 213 173 402 196 94 176 360 328 200 248 147 380 329 329 329 329 107 107 302 302 302 302 497 98 98 98 225 225 225 225 225 225 225 225 225 225 225 225 225 225 225 225 320 7 217 70 329 329 495 406 467 139 139 175 175 423 423 423 423 162 68 68 115 273 470 120 120 240 314 131 472 401 401 259 354 190 380 288 288 278 31 342 86 105 105 26 359 359 474 474 474 474 19 19 454 454 454 225 225 225 225 225 225 225 225 225 225 225 7 7 127 114 258 258 31 342 342 168 356 281 453 453 168 44 44 44 42 147 147 380 288 286 286 464 139 139 497 497 349 349 234 261 25 25 480 480 480 480 85 85 85 299 299 339 339 94 199 360 360 213 360 173 173 402 196 94 199 176 328 328 200 200 464 356 356 430 453 430 430 116 199 277 277 277 277 24 131 419 439 439 439 225 225 225 225 391 391 80 80 491 491 320 345 346 350 496 348 35 310 152 152 152 422 186 342 224 470 278 278 457 36 384 371 180 315 315 315 315 450 450 413 413 303 48 404 13 229 491 247 15 15 193 193 17 +17 17 17 296 363 363 51 51 51 184 491 373 72 268 268 268 268 268 88 88 88 353 353 406 467 106 297 297 297 297 293 293 219 219 464 106 106 387 387 372 349 349 205 261 25 106 496 496 496 233 233 233 233 270 270 433 390 390 18 112 112 439 439 439 439 439 78 78 491 491 28 28 28 2 2 2 491 2 2 341 341 491 341 341 163 163 163 163 163 163 163 316 491 316 491 491 491 73 491 373 66 68 172 115 273 231 231 231 231 53 53 394 76 164 214 214 214 214 214 328 200 200 248 248 248 212 127 45 45 92 92 167 167 35 472 393 393 155 155 332 332 332 387 134 251 251 241 431 431 376 376 376 460 460 169 178 35 401 458 144 192 69 69 223 223 223 223 223 130 130 352 352 402 483 440 411 475 475 475 94 475 475 475 324 324 464 180 493 493 493 493 216 300 382 382 313 10 10 479 331 290 290 290 290 434 434 434 434 203 399 217 70 70 65 65 428 428 428 146 146 24 131 472 221 259 420 420 420 420 420 143 458 144 144 27 437 481 481 481 481 481 293 122 122 131 472 393 393 234 234 234 261 487 288 288 432 330 348 64 76 310 107 107 395 395 459 459 459 459 215 233 131 427 229 247 247 126 326 326 326 326 326 101 101 149 149 228 228 412 188 154 154 154 416 416 96 368 453 453 278 278 278 278 278 31 342 86 86 238 6 272 191 191 191 191 37 325 335 335 440 145 89 194 446 446 67 212 131 34 106 297 297 297 293 43 345 109 109 109 171 171 368 453 342 58 72 72 110 110 486 486 460 240 240 325 34 154 154 154 416 96 96 453 168 278 278 278 31 39 86 238 6 272 191 191 191 314 133 259 354 255 236 236 108 119 397 487 360 360 360 434 339 33 250 217 65 65 329 495 406 288 288 139 175 175 423 423 423 423 129 458 144 27 437 151 151 169 169 402 402 221 401 401 354 495 498 59 396 313 325 449 89 89 446 116 250 217 473 258 258 31 342 224 494 494 494 368 142 142 397 147 380 329 329 329 329 329 36 310 107 395 302 302 302 375 98 98 229 82 247 126 126 126 326 326 326 326 326 326 101 408 149 149 228 289 412 188 188 340 121 394 478 478 68 86 105 336 354 106 265 265 428 428 146 146 240 325 449 34 223 223 223 223 223 223 173 173 402 352 352 352 352 352 352 97 97 417 197 417 237 237 237 237 237 80 491 80 80 80 209 287 157 157 157 372 245 245 129 129 74 492 492 492 492 58 110 486 486 486 460 215 215 35 96 66 68 68 342 221 336 336 354 420 420 422 143 458 144 27 27 351 151 151 151 368 453 342 168 168 223 223 223 223 223 223 173 173 352 352 352 352 97 97 225 225 225 225 80 491 127 114 222 222 468 313 313 325 34 490 490 31 342 342 342 273 494 203 70 65 134 134 175 175 431 486 486 486 486 468 468 468 469 469 469 36 449 449 41 41 41 19 19 454 417 170 170 170 491 28 28 28 28 491 28 362 362 491 362 362 362 362 491 362 491 491 491 362 211 211 211 369 491 369 369 369 369 369 21 21 21 21 21 21 21 21 21 408 408 408 149 149 228 289 491 320 473 65 329 329 245 406 406 380 134 139 139 175 423 423 423 423 345 141 141 281 281 9 168 44 44 44 129 129 108 108 119 119 437 437 437 481 481 481 481 481 481 481 182 182 182 182 293 293 497 497 497 98 98 404 225 225 193 193 +17 17 296 296 4 261 25 470 278 278 278 330 116 33 195 195 212 90 133 364 276 276 174 174 174 203 53 53 473 275 275 388 195 117 404 133 364 345 333 333 220 220 402 335 14 411 145 145 145 365 365 365 360 200 64 212 212 302 302 302 302 497 497 497 497 49 269 342 342 168 89 89 194 446 446 64 133 133 345 333 333 220 216 114 180 113 113 113 113 450 167 285 131 472 221 458 144 208 27 27 498 498 498 498 498 59 59 59 59 59 173 173 270 270 270 390 390 390 112 112 417 417 417 237 47 491 491 491 80 80 491 491 373 156 156 156 156 156 313 236 239 259 384 180 106 106 306 306 306 396 396 178 143 458 192 445 72 110 351 264 264 264 468 468 468 382 313 186 99 338 338 338 338 395 395 84 84 496 496 274 122 314 314 478 478 68 115 273 231 231 231 53 53 90 90 32 32 144 208 79 79 380 288 288 171 171 171 252 186 39 342 232 482 482 238 6 161 79 487 288 213 213 213 358 358 233 270 270 270 342 168 89 55 322 322 250 345 141 141 281 453 9 168 106 297 297 297 297 293 43 345 109 109 109 368 31 342 86 238 6 108 119 397 487 278 278 31 31 86 86 272 191 191 191 236 325 34 230 230 230 230 215 35 259 340 340 340 94 44 44 44 44 72 72 72 437 306 306 306 306 306 396 396 313 26 251 241 431 278 240 285 285 302 302 497 122 10 10 479 331 331 405 405 206 206 167 449 472 221 336 354 255 255 58 72 72 72 437 480 480 480 480 480 85 299 299 299 299 339 195 243 212 131 419 225 397 133 320 345 333 333 220 220 164 164 472 221 401 491 108 108 119 374 374 374 374 374 374 132 132 132 132 132 43 364 276 276 346 346 265 265 85 85 146 468 468 382 313 337 58 183 72 72 110 110 264 264 468 468 245 215 35 259 74 351 275 275 116 379 195 471 471 478 66 68 68 238 6 272 189 189 189 189 178 458 192 44 44 236 416 239 208 79 79 380 151 151 151 150 39 342 342 224 494 494 134 8 402 359 359 474 474 166 422 349 164 164 164 487 487 487 487 374 374 88 88 277 277 277 277 385 75 227 419 427 491 491 247 312 312 292 292 292 292 292 292 292 21 21 1 21 21 21 21 21 21 21 408 408 408 408 149 491 491 373 338 400 400 400 400 30 301 422 251 251 367 367 367 367 367 367 35 96 259 26 26 241 266 266 266 266 266 266 35 192 44 44 44 43 364 276 174 174 174 174 53 53 65 242 242 94 199 462 462 462 402 196 309 309 479 331 486 315 315 315 460 450 406 467 84 84 84 88 88 88 154 154 154 96 96 232 232 105 105 336 354 485 286 286 286 468 468 337 337 11 11 11 11 379 379 471 77 342 224 89 89 116 33 90 42 147 147 380 288 278 236 36 449 191 191 191 131 472 458 144 27 437 370 370 370 370 370 348 64 64 310 310 436 395 459 459 11 11 379 303 471 243 270 269 433 160 160 18 112 439 225 225 225 225 80 80 491 491 320 345 407 407 407 236 233 36 310 107 338 338 400 400 400 400 301 378 43 345 109 346 346 141 355 355 355 37 185 185 433 433 433 160 160 112 427 56 491 491 312 15 15 15 15 193 193 193 193 193 17 17 +17 17 17 296 296 363 51 51 51 51 51 491 184 184 184 184 320 159 159 159 159 240 35 127 0 0 222 378 345 347 141 141 281 342 342 44 44 44 38 162 232 68 172 115 470 470 171 171 171 252 173 173 176 176 135 135 200 200 248 248 478 232 232 232 68 115 273 231 231 231 231 53 76 76 164 164 214 214 214 328 200 464 255 255 8 259 180 113 113 113 113 450 285 285 58 156 156 156 156 156 245 245 399 217 217 473 65 315 315 315 315 450 450 450 450 169 352 164 164 97 397 397 364 407 407 407 407 407 407 36 310 310 107 107 107 447 97 483 197 226 226 80 80 209 209 188 118 118 118 118 352 352 25 177 177 254 325 34 254 254 254 314 129 259 137 137 137 137 137 116 195 195 195 335 14 226 411 145 463 463 463 463 29 29 382 313 186 186 162 68 172 344 344 344 344 344 274 186 162 232 68 26 26 26 386 431 428 428 146 146 35 26 359 166 166 166 236 36 384 490 490 490 173 280 280 180 443 443 139 175 175 81 81 469 469 469 215 233 96 96 227 419 427 229 247 126 126 126 292 326 326 326 326 326 101 101 101 149 149 228 491 320 7 70 70 65 65 428 428 85 146 24 325 202 202 202 402 129 259 137 137 137 137 33 76 465 465 27 121 121 33 394 478 68 68 115 115 278 278 278 285 300 382 313 24 325 34 121 121 116 33 394 212 239 371 371 278 278 143 458 458 192 469 469 325 34 459 459 459 173 173 280 69 69 223 130 280 44 44 44 38 342 68 115 273 432 432 330 379 64 77 77 224 494 462 462 402 183 183 219 485 485 485 374 374 132 399 53 334 334 334 59 452 263 417 417 417 417 237 237 28 28 491 28 362 362 491 491 491 362 491 362 491 362 491 491 40 211 211 369 369 369 369 21 21 21 21 21 21 260 260 260 260 391 391 391 491 73 289 289 7 345 109 109 109 347 467 467 499 297 297 297 297 293 293 35 259 74 190 104 104 104 285 34 41 324 301 43 276 109 109 139 139 139 293 293 293 98 98 13 13 417 417 417 417 417 417 80 80 80 80 435 66 66 179 179 179 179 179 314 314 196 217 473 258 258 31 342 224 494 494 281 142 142 397 147 380 329 329 329 329 143 310 107 395 395 302 302 375 98 98 13 417 417 417 170 249 20 28 305 491 442 305 305 2 491 491 2 2 249 491 491 305 305 366 491 491 366 316 491 435 435 435 435 435 435 289 412 287 287 111 111 111 438 378 345 141 141 281 9 142 336 144 27 480 480 480 146 299 64 34 34 462 130 29 44 255 38 349 205 205 261 190 380 288 288 171 171 252 252 314 239 219 219 219 152 152 374 374 132 132 43 364 276 109 372 372 59 59 396 339 243 227 472 472 198 216 127 114 84 84 84 16 16 16 274 98 98 13 225 225 225 345 409 409 409 409 94 199 111 111 111 438 438 162 342 172 115 273 106 481 481 426 426 206 399 217 217 473 486 486 486 460 460 169 164 164 485 485 485 485 374 318 186 162 54 238 6 272 499 499 499 206 240 285 34 176 135 200 200 106 284 405 206 169 169 402 96 36 272 469 469 236 325 93 93 93 207 207 19 454 454 229 82 247 126 126 126 326 326 326 326 326 326 326 326 326 101 101 101 149 149 228 412 412 287 111 111 111 438 349 164 164 106 106 405 405 206 167 457 217 473 473 476 476 171 252 252 8 420 420 324 324 183 451 30 30 30 378 141 141 141 281 342 142 221 144 180 84 496 88 88 176 176 135 135 248 248 108 377 123 123 216 22 283 455 236 384 180 91 91 91 91 91 178 458 96 6 272 334 334 334 304 304 304 185 185 323 390 390 18 112 439 417 237 237 237 237 237 237 237 237 237 128 128 128 128 128 128 193 193 17 +17 17 17 17 296 317 317 317 491 317 461 461 461 461 184 184 184 184 491 7 7 70 65 329 329 42 406 406 288 134 139 139 175 175 423 423 423 423 31 342 26 26 251 241 431 278 278 278 215 233 233 270 270 433 433 86 238 336 336 82 108 397 397 441 441 109 278 278 385 233 36 310 447 447 238 6 272 34 319 319 348 64 212 300 300 313 186 162 232 238 6 470 470 294 294 294 294 330 94 94 176 176 328 200 200 248 359 359 474 474 474 19 19 454 454 229 170 491 247 312 126 292 292 292 292 292 21 21 21 408 408 408 408 408 391 391 228 491 373 373 400 400 400 30 30 58 58 110 254 254 254 254 325 34 154 154 458 96 66 342 105 105 336 470 151 151 178 178 35 96 401 75 272 191 191 191 314 196 217 473 258 258 258 31 342 224 494 494 368 9 142 397 147 380 329 329 329 329 252 36 107 107 302 302 302 302 175 175 431 230 230 230 230 215 35 227 419 439 439 439 225 225 47 47 491 80 491 373 373 338 338 400 400 400 30 30 58 58 110 254 254 254 254 254 35 196 196 309 479 331 84 84 84 496 274 274 413 413 466 466 45 45 45 45 216 198 22 283 455 38 162 68 68 115 273 273 265 428 428 146 146 146 35 449 34 69 69 130 402 402 196 217 473 486 486 486 460 460 169 169 164 164 485 485 485 374 132 301 236 129 310 310 107 395 180 106 426 426 426 206 348 76 465 449 449 176 135 200 200 199 106 426 405 426 206 402 402 478 232 68 68 115 344 344 344 344 88 14 14 411 319 319 319 94 199 154 154 458 445 445 351 351 351 315 315 450 413 413 76 449 449 134 134 134 8 26 359 359 474 474 474 324 19 454 229 247 247 126 126 326 326 326 326 326 326 326 101 101 101 149 149 228 320 7 345 389 389 389 389 314 401 259 420 420 420 420 422 236 129 36 108 119 344 344 344 374 374 132 132 399 70 383 383 383 383 383 383 36 107 447 393 393 155 332 332 332 245 58 156 156 156 313 10 10 479 331 290 171 171 252 173 8 29 334 304 304 304 186 54 142 221 336 445 445 485 485 485 468 468 468 337 337 485 464 180 180 405 405 169 169 150 342 342 224 469 469 325 325 41 41 41 19 19 454 13 439 78 78 491 491 28 491 312 341 491 341 12 12 341 491 12 12 260 260 260 260 260 391 391 228 491 491 289 289 209 287 287 16 16 16 274 413 348 479 331 84 84 84 16 16 16 16 203 381 48 417 417 417 417 417 237 491 47 47 47 491 491 47 491 80 80 491 209 287 287 111 111 111 111 203 203 90 90 76 458 208 441 441 346 428 428 428 146 146 131 133 133 364 109 109 139 139 139 139 375 375 98 98 417 417 417 225 225 225 287 287 297 297 297 297 293 293 216 216 114 84 84 84 16 88 88 111 111 111 111 438 58 58 110 110 110 254 254 240 34 44 44 44 8 354 180 180 376 376 376 376 460 282 240 24 131 183 72 110 110 443 443 240 325 34 207 207 207 324 416 416 239 219 357 357 443 443 169 150 150 86 238 6 469 469 469 469 325 93 93 93 207 207 19 454 417 417 417 417 80 491 435 373 338 400 400 400 30 30 422 162 342 115 273 470 120 120 120 37 24 404 229 229 247 312 15 15 15 193 193 193 193 17 +17 17 17 363 363 363 363 51 51 228 491 289 7 217 70 65 329 329 329 329 169 164 164 164 485 485 485 301 378 378 43 345 109 189 432 330 348 64 76 36 377 377 123 123 8 259 190 380 499 499 428 428 146 146 252 35 131 133 147 147 380 288 288 173 173 280 29 334 59 59 452 263 417 417 417 417 417 237 237 237 435 237 237 80 491 435 435 435 7 7 364 345 152 152 468 468 313 416 416 445 180 443 443 240 325 34 135 135 135 200 200 44 44 44 251 241 81 278 278 26 34 302 497 122 8 32 32 32 354 153 153 153 153 387 387 387 207 207 146 301 393 155 165 165 165 165 53 44 44 94 335 14 411 411 153 387 372 372 396 349 349 352 25 242 242 94 199 255 271 38 342 342 115 273 106 265 265 85 85 146 175 175 81 242 203 217 473 89 340 116 195 195 195 195 195 197 309 309 479 331 84 84 496 173 280 29 469 38 162 54 482 105 336 144 180 496 496 496 274 186 99 436 436 395 423 423 423 423 355 263 263 229 82 247 126 126 326 326 326 326 101 101 149 149 228 412 412 83 194 194 194 322 212 131 451 30 356 281 342 342 221 144 27 351 319 319 319 53 176 135 135 200 248 248 248 479 331 106 426 426 125 348 466 466 22 283 455 236 259 161 161 161 487 288 290 290 290 434 434 339 248 76 108 377 351 494 116 94 479 331 428 428 428 428 299 358 358 24 227 419 439 439 417 417 237 237 28 28 491 28 491 362 491 491 362 362 491 362 491 362 491 362 491 491 362 218 218 40 305 366 491 366 366 366 366 366 491 435 435 316 73 73 289 289 209 188 118 118 118 118 402 221 196 217 473 65 329 329 245 42 147 380 288 134 139 175 175 423 423 423 423 58 110 254 254 254 254 36 478 232 232 68 172 115 273 470 120 120 120 120 24 314 314 472 198 127 45 45 45 45 457 196 217 217 473 65 329 486 486 460 460 169 164 164 485 485 485 485 485 374 132 58 254 254 254 254 314 416 458 144 180 106 426 426 426 413 348 64 76 465 108 377 123 44 236 129 259 190 190 380 499 499 428 85 146 146 143 131 472 133 133 147 147 380 288 288 173 173 280 29 334 59 59 59 313 143 36 377 377 87 87 217 473 65 213 213 213 252 325 34 44 44 44 129 259 445 445 445 351 351 351 486 365 365 360 200 200 200 248 212 192 180 255 495 42 147 380 380 374 374 132 132 132 349 155 155 165 165 165 165 70 65 284 405 206 169 150 162 482 482 482 482 482 238 6 161 487 487 288 288 288 139 139 139 81 337 324 423 423 423 423 423 423 452 263 229 247 312 126 126 292 326 326 326 326 326 326 326 408 408 149 149 228 491 320 7 217 258 258 258 31 54 224 494 494 368 453 142 142 397 147 380 329 329 329 329 252 36 107 107 395 302 302 302 375 375 98 143 401 259 144 389 389 389 389 314 196 309 479 307 307 307 61 61 285 449 202 202 202 130 129 259 354 137 137 137 116 33 250 217 217 70 70 138 138 138 138 138 372 467 467 255 255 38 54 86 238 6 272 106 499 405 426 206 348 199 199 459 469 271 99 447 447 6 6 6 419 439 78 491 305 421 491 421 128 491 193 193 193 17 +17 17 17 17 296 317 317 317 435 435 184 184 184 373 373 338 400 400 400 30 378 345 141 141 281 453 168 145 145 145 460 460 460 178 96 96 436 447 134 134 134 134 134 359 166 166 166 166 324 186 162 482 482 482 238 6 336 161 487 278 278 178 458 192 192 242 116 116 195 195 394 394 212 401 401 401 401 384 371 180 180 319 319 319 203 53 381 381 381 381 381 381 76 393 155 332 332 332 332 245 349 205 205 261 25 106 265 265 265 265 85 85 85 146 146 173 402 402 66 68 68 115 273 470 151 151 178 458 458 192 242 275 275 379 303 471 471 471 49 433 160 112 427 247 247 312 126 292 292 292 292 292 292 292 292 292 21 21 21 326 21 21 408 408 408 408 149 149 228 228 316 316 73 491 289 289 209 177 177 177 177 131 133 133 141 141 141 141 281 453 342 483 483 226 226 209 287 319 319 319 319 348 348 394 478 478 66 68 68 115 494 494 494 215 129 259 74 74 437 72 72 437 437 496 496 496 496 274 274 368 368 368 9 168 494 134 134 8 100 100 100 100 100 375 375 497 216 198 45 45 45 45 35 196 196 70 65 329 329 329 406 406 467 288 139 175 175 423 423 423 345 141 141 281 342 142 196 217 473 476 476 476 143 458 192 176 135 135 328 200 248 248 393 234 234 234 261 25 319 319 319 348 94 199 223 223 130 402 58 156 156 156 156 59 59 59 452 263 229 247 247 126 126 126 292 326 326 326 326 1 1 1 1 408 408 260 260 391 391 391 391 491 73 73 73 289 491 320 159 159 159 159 159 385 35 196 196 217 473 258 258 258 342 342 224 494 494 494 368 9 142 397 147 147 329 329 329 329 329 329 143 310 107 107 395 302 302 302 497 497 43 345 141 141 141 281 453 168 483 14 226 209 297 297 297 297 297 399 70 65 65 496 169 150 54 238 6 6 472 393 234 261 261 148 148 148 387 372 396 186 186 54 86 238 6 6 472 472 472 482 224 224 494 494 38 162 323 323 224 494 494 129 259 74 437 496 496 496 496 274 274 368 368 9 168 277 277 277 37 24 131 227 419 439 439 439 439 417 237 237 237 28 28 491 491 28 362 491 362 362 362 491 491 491 491 362 362 491 218 362 491 491 218 491 218 218 218 218 218 435 218 366 491 491 305 366 491 366 435 366 491 366 491 366 316 316 491 316 491 316 316 491 73 73 289 289 209 287 430 430 430 430 430 430 219 219 477 477 378 88 109 44 116 116 199 335 14 226 226 226 209 209 411 498 498 498 308 396 313 94 459 459 459 459 271 31 342 86 86 6 272 472 221 196 70 473 329 329 329 406 467 134 134 134 175 175 423 423 423 423 423 263 263 225 225 225 225 225 80 373 373 338 338 400 400 400 30 422 239 384 490 490 399 217 473 365 365 365 365 365 388 64 212 191 191 191 314 133 259 409 409 409 409 33 33 250 32 280 280 153 153 343 387 387 146 358 39 39 86 142 142 397 456 456 456 236 36 108 119 308 308 308 308 308 308 308 388 339 33 394 212 108 123 123 123 123 58 156 156 156 156 59 59 452 263 229 229 247 312 312 126 292 292 292 292 292 1 1 1 1 23 23 23 408 408 408 391 391 316 73 491 289 289 7 357 357 357 271 31 342 168 494 255 402 402 458 208 441 441 153 153 153 387 372 396 396 271 186 39 39 390 390 390 390 390 390 18 18 112 439 439 439 439 237 78 421 128 193 193 17 +17 17 17 296 296 296 184 184 373 66 172 179 179 179 179 314 196 196 70 65 329 329 495 406 467 288 134 139 139 175 423 423 423 423 263 229 82 247 126 126 326 326 326 326 101 101 101 149 149 228 412 83 253 253 253 453 342 224 118 118 118 118 402 402 221 259 144 445 180 443 240 449 449 176 135 135 200 200 248 248 32 32 354 354 153 153 153 153 387 387 387 85 207 318 185 269 9 142 393 155 165 165 165 165 70 14 14 411 153 387 372 372 349 349 205 352 29 242 116 94 199 255 38 31 342 68 115 273 265 265 85 85 85 175 175 81 203 203 471 471 49 453 168 89 340 116 116 10 10 479 331 84 84 496 274 8 29 459 313 31 162 54 105 105 336 27 496 496 496 496 274 99 99 436 395 423 423 423 43 43 345 347 347 245 245 129 259 74 437 437 306 306 306 206 240 285 449 69 223 130 198 198 283 455 219 219 219 219 219 485 374 374 374 132 132 99 99 161 161 397 134 100 100 100 497 497 186 162 482 142 105 336 336 336 190 380 288 288 360 328 200 200 195 195 248 248 364 364 276 276 109 498 498 498 396 396 178 35 458 192 125 125 125 125 348 199 335 14 411 411 475 475 475 94 475 475 324 324 301 378 43 364 276 109 109 443 443 139 139 139 293 293 497 497 42 42 147 147 380 288 443 443 416 416 458 445 485 134 134 175 175 158 158 158 158 325 449 191 191 191 325 335 14 145 145 486 460 460 173 280 280 242 242 116 379 250 359 81 41 324 324 324 422 349 234 234 261 261 25 106 306 306 306 306 306 306 282 203 203 117 404 229 247 247 126 126 326 326 326 326 326 326 326 326 326 101 408 408 149 228 491 491 412 188 340 340 67 77 478 232 86 68 272 470 470 443 443 240 34 223 223 130 129 259 354 420 420 420 360 135 135 135 200 44 44 44 44 199 335 145 319 319 319 348 348 33 90 72 72 72 72 498 498 498 498 498 396 396 285 285 180 106 284 353 206 206 173 280 280 121 121 116 199 469 469 173 280 418 418 418 418 418 418 99 99 436 436 60 60 298 298 303 303 303 117 404 13 229 491 247 312 126 292 292 292 292 292 292 12 12 12 21 260 305 201 201 201 201 201 201 201 201 201 491 491 316 316 491 316 491 289 289 7 7 473 258 258 258 342 224 494 494 494 281 9 142 397 147 329 329 329 329 329 143 36 107 107 395 302 302 497 497 349 349 234 261 25 180 189 139 139 139 293 167 35 35 198 45 45 45 45 310 338 400 400 400 400 30 30 3 58 110 110 254 254 254 254 254 314 131 133 364 147 456 456 456 38 162 68 68 172 115 444 444 444 444 444 246 246 318 173 402 6 272 34 44 44 44 8 8 401 401 401 197 491 80 491 80 491 80 80 197 66 66 68 172 115 273 494 278 173 8 4 280 485 485 286 286 286 286 468 382 245 245 399 399 217 217 217 217 473 65 432 432 330 348 64 64 465 449 449 302 302 302 497 497 122 129 401 401 401 401 491 310 107 395 395 106 481 424 424 182 182 375 375 122 233 75 227 227 419 419 439 439 439 439 439 237 439 78 78 47 491 47 491 491 316 316 491 491 316 316 73 373 373 373 338 400 400 400 30 422 422 164 164 25 106 106 405 405 405 206 167 449 449 34 340 340 116 94 199 145 154 178 458 96 342 342 224 105 27 386 386 386 386 399 473 418 418 418 418 99 436 436 60 60 298 116 33 250 53 394 76 259 74 441 441 153 387 387 299 299 299 358 243 270 270 433 160 112 427 491 247 247 126 15 15 193 193 193 193 17 +17 17 17 17 296 296 52 52 52 51 51 51 51 51 184 184 491 320 7 217 70 473 65 65 329 42 42 147 147 147 380 288 134 139 175 175 423 423 423 423 423 335 440 89 89 446 446 212 131 472 196 196 473 65 486 486 460 460 169 169 164 164 485 485 485 132 143 129 401 144 144 27 27 437 151 151 169 164 164 164 401 401 259 29 382 313 285 34 69 223 130 280 106 297 297 297 297 297 293 215 35 35 259 74 74 213 213 213 213 213 252 215 259 29 100 302 175 175 81 255 255 236 384 180 405 405 405 206 215 96 449 135 135 135 200 44 44 44 8 32 32 401 401 401 401 401 354 354 153 153 153 153 153 153 153 387 387 387 207 207 207 207 19 19 454 454 454 229 229 247 312 312 312 126 292 292 292 292 292 12 12 12 12 12 12 260 260 260 260 391 391 391 228 491 373 373 155 165 165 165 165 53 44 44 199 106 106 284 387 372 372 396 349 349 234 261 29 242 116 94 199 255 38 162 232 232 172 115 273 265 265 265 85 146 146 146 175 175 81 459 203 203 203 381 48 404 13 439 78 170 170 170 28 491 187 187 341 2 2 2 491 2 2 362 362 362 362 40 491 366 366 491 366 435 366 366 491 366 491 316 316 316 491 491 435 435 491 289 7 7 364 276 109 109 139 139 139 139 293 293 375 98 98 98 225 225 225 225 225 225 225 465 198 127 5 5 455 43 43 364 276 276 109 109 498 498 498 498 134 134 139 302 293 497 122 122 131 133 345 141 141 141 281 162 232 232 232 232 68 68 115 273 498 498 498 313 240 35 26 26 359 359 166 166 166 422 143 36 108 108 119 308 308 308 308 308 313 94 176 135 200 200 200 230 230 230 215 35 478 232 68 68 273 273 265 428 146 146 416 416 401 401 259 371 180 315 315 315 315 315 315 450 450 450 413 413 303 117 48 404 229 229 491 312 312 126 126 292 292 292 292 292 292 292 292 292 1 21 21 21 21 408 408 408 408 408 391 391 391 228 491 373 373 338 400 400 400 400 378 378 345 389 389 389 314 8 354 420 420 420 422 342 342 224 494 494 129 129 74 190 487 499 499 265 85 85 146 368 453 238 6 272 34 494 236 314 196 196 309 309 309 479 331 231 231 231 231 349 164 164 214 214 214 328 200 200 464 145 460 460 460 169 402 96 272 300 382 313 216 216 114 258 258 258 271 271 39 433 433 433 390 160 18 112 427 56 56 170 312 312 312 187 187 292 12 12 12 12 12 12 12 12 12 408 163 163 163 491 316 491 316 491 491 289 289 289 7 7 309 309 309 479 331 231 231 231 231 169 164 164 164 214 214 214 214 328 328 200 303 117 404 404 439 439 439 439 439 237 237 237 421 421 421 421 491 421 128 491 128 128 128 128 193 193 17 +17 17 17 296 211 317 317 317 317 52 52 52 52 52 52 52 52 51 51 51 51 184 184 184 491 320 320 181 181 181 285 449 34 125 125 125 348 348 457 14 226 226 226 226 209 411 498 498 498 498 498 169 169 164 164 472 221 259 354 181 181 236 35 478 54 224 344 344 344 36 449 44 44 44 10 10 10 479 331 84 496 496 274 99 99 436 60 60 298 116 94 199 340 340 116 76 377 123 123 123 219 477 222 222 222 372 372 245 58 72 72 110 110 120 120 120 120 37 24 24 131 404 439 225 225 225 225 80 80 80 373 373 338 400 400 400 400 422 143 384 490 490 490 399 217 473 365 365 365 365 365 365 330 388 212 384 191 191 191 314 401 401 75 384 490 490 31 342 342 224 494 494 129 259 74 190 487 487 374 374 374 173 173 176 176 135 200 200 248 359 359 474 474 474 474 19 454 229 229 491 247 312 312 126 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 260 408 408 408 408 391 391 491 316 73 73 73 73 491 127 127 258 258 258 258 31 342 342 342 97 72 72 110 254 254 254 254 314 35 259 137 137 137 137 137 33 33 394 32 465 384 180 180 319 319 319 348 195 195 250 250 364 333 333 220 220 216 114 180 113 113 113 113 167 167 131 449 183 156 156 156 156 406 467 255 255 236 314 90 4 280 280 265 265 265 85 146 146 186 39 342 142 221 336 354 420 420 360 360 135 135 200 200 464 145 376 376 376 376 460 169 150 150 342 86 105 6 96 96 227 419 419 439 78 47 47 47 47 491 491 80 80 289 289 209 83 194 194 194 194 194 194 282 388 195 195 131 472 472 196 217 70 65 65 319 169 150 150 86 238 6 472 221 336 74 190 492 492 492 492 245 349 205 205 261 25 148 148 148 372 372 396 396 271 186 39 86 86 142 221 336 354 420 420 420 422 143 36 371 490 490 31 9 142 221 336 336 74 190 487 487 288 374 374 374 132 132 132 173 173 96 6 227 419 439 439 78 170 305 170 28 28 491 28 491 28 28 28 491 362 362 362 362 362 362 491 362 362 362 491 362 362 305 305 218 218 491 218 218 218 491 218 218 218 218 435 435 211 211 369 21 21 21 21 21 21 21 21 23 23 260 260 260 260 391 391 391 491 73 73 289 289 320 320 109 109 84 84 139 139 139 139 139 16 16 293 293 293 43 364 345 152 152 152 152 422 402 221 221 354 137 137 137 137 33 394 76 465 164 214 214 214 360 360 76 458 192 176 176 135 200 200 464 255 255 8 354 180 113 113 113 113 206 240 285 34 277 277 457 173 155 155 332 332 38 162 232 68 115 273 231 231 231 231 53 53 90 76 108 119 103 103 103 103 103 103 103 85 299 299 203 381 381 117 404 263 439 439 417 417 237 237 47 491 80 80 80 80 435 435 209 287 297 297 297 297 293 293 293 43 364 109 109 278 278 348 64 465 449 300 382 495 467 467 242 116 33 394 393 205 261 25 470 376 376 376 376 460 178 178 233 96 96 227 419 419 439 225 80 80 80 80 80 320 456 456 456 456 236 108 119 308 308 308 308 308 179 313 64 212 472 196 217 473 65 329 495 406 467 134 134 139 175 175 423 423 423 423 263 13 229 229 491 312 15 15 15 193 193 193 193 17 +17 17 17 363 363 363 363 363 408 51 149 228 491 491 320 7 473 258 258 258 31 342 224 494 494 368 453 168 180 145 329 329 175 175 81 81 469 416 416 96 453 168 470 365 365 365 330 348 212 300 300 382 313 186 162 232 232 105 105 336 470 432 432 330 330 64 64 77 449 224 300 156 382 245 43 43 345 141 141 281 453 342 168 230 230 230 215 35 74 183 485 286 286 382 245 245 43 364 276 174 174 174 348 348 64 64 212 93 93 93 93 93 301 8 8 255 255 349 349 155 155 148 148 372 372 245 245 458 458 208 190 487 487 278 258 31 342 342 86 105 196 217 473 459 459 271 271 39 433 433 160 168 89 55 322 67 394 76 310 338 338 400 400 400 400 30 324 422 186 162 68 68 115 470 470 120 240 240 314 310 338 400 400 400 400 301 378 345 141 141 281 281 9 221 336 144 106 496 88 319 146 135 339 76 36 377 87 87 416 259 445 180 443 443 240 325 34 44 44 44 251 251 241 431 278 26 302 302 497 497 122 32 401 82 144 498 498 498 498 498 139 302 293 497 497 497 122 393 155 155 165 165 165 165 466 466 448 448 464 255 38 38 162 68 115 273 273 265 265 85 85 146 146 175 81 81 242 203 399 70 65 410 410 410 410 173 280 29 29 406 467 89 446 67 58 72 72 72 496 496 496 215 215 35 96 96 272 449 242 275 275 275 303 195 199 335 188 188 340 340 466 22 283 455 38 162 232 105 105 336 491 336 190 380 288 288 288 328 328 200 303 303 117 48 78 491 491 491 491 421 421 491 201 193 193 193 193 +17 17 17 363 363 51 51 491 184 373 373 66 232 68 172 115 273 84 84 16 16 274 274 399 70 473 65 329 329 460 169 169 164 164 164 485 485 485 374 88 89 89 446 212 131 106 111 111 111 111 85 438 58 110 110 202 202 202 402 221 36 108 119 437 405 405 206 178 35 96 96 272 277 191 325 34 180 410 410 410 410 410 410 173 280 29 334 382 59 59 245 335 14 287 284 405 405 206 169 349 352 25 242 242 242 116 94 199 106 426 426 426 426 282 282 388 195 117 335 335 440 145 463 463 463 463 29 382 313 186 162 342 172 115 273 432 432 330 379 379 243 243 243 77 433 433 433 160 112 56 247 247 312 126 292 292 292 292 1 326 326 326 326 101 101 149 149 228 491 491 320 345 152 152 422 422 164 164 106 106 405 405 206 167 35 35 397 152 152 152 314 90 458 445 180 443 443 240 285 44 44 8 259 354 153 153 153 153 387 387 207 207 19 454 417 417 417 417 417 237 237 47 491 47 2 491 2 491 491 2 316 316 316 491 316 316 73 491 289 491 7 217 217 473 329 329 329 329 329 329 164 485 485 485 485 374 141 281 281 9 221 336 445 180 443 240 325 449 176 135 135 200 200 180 230 230 230 230 215 35 192 340 340 340 116 33 33 219 219 219 219 286 286 286 286 334 304 304 304 185 49 323 219 219 152 152 152 236 94 331 84 84 84 84 84 16 274 98 263 13 417 417 417 417 435 225 373 451 451 451 30 30 422 186 162 232 232 68 68 115 273 278 278 178 143 96 96 86 86 86 238 272 41 41 41 41 19 19 454 454 225 225 225 83 83 55 55 55 322 212 34 30 30 30 324 356 356 356 356 281 453 342 242 242 379 379 478 478 68 172 344 344 344 344 344 186 162 54 482 142 105 336 336 190 499 499 499 499 85 85 146 146 464 253 253 253 368 342 342 451 30 30 301 378 43 276 174 174 319 319 348 379 77 77 9 142 397 336 276 276 346 346 346 387 355 355 355 37 185 185 269 433 433 112 427 82 247 312 126 292 292 326 326 326 326 326 326 326 326 101 101 149 149 228 491 451 257 257 257 31 342 142 72 72 437 306 306 306 306 306 396 167 167 457 32 401 259 161 161 487 499 151 481 215 215 29 302 497 497 71 71 342 342 57 57 57 57 203 53 44 44 44 416 239 458 484 484 484 484 314 32 32 259 384 371 213 213 213 286 286 139 139 302 375 375 98 98 13 417 229 491 247 126 126 126 292 326 326 326 408 408 408 149 149 228 491 289 491 83 55 55 55 322 67 212 219 219 152 152 152 236 10 309 331 331 84 84 16 274 88 58 72 268 268 268 268 268 268 274 32 32 401 401 401 384 371 443 443 443 150 150 86 105 105 336 29 29 288 313 285 131 72 72 437 306 306 306 306 306 396 396 24 325 34 177 177 143 77 342 142 221 336 144 180 405 405 206 167 167 35 36 377 377 87 87 8 354 420 420 420 246 246 252 325 87 87 416 458 445 180 443 443 240 325 58 72 72 72 437 265 265 85 85 468 468 468 396 313 24 131 58 72 110 351 139 139 139 139 293 375 375 233 233 233 419 229 491 247 312 15 15 15 15 15 15 193 193 193 193 193 17 +17 17 17 296 296 363 52 52 52 52 52 51 51 51 51 184 491 491 412 0 0 222 356 356 281 9 196 196 479 331 463 463 463 463 29 29 382 245 335 14 14 226 226 226 226 209 209 475 475 475 475 475 475 475 324 301 8 354 180 151 240 240 325 41 324 324 422 36 377 87 87 354 420 420 420 324 3 58 72 72 110 110 486 486 486 460 282 37 24 35 259 159 159 159 236 35 198 127 114 84 496 496 274 186 162 482 482 482 482 482 238 6 272 371 485 374 374 374 8 354 29 191 191 191 37 24 131 472 225 72 72 72 110 110 486 486 460 460 460 169 352 352 402 221 401 336 79 79 288 84 496 496 413 348 250 250 81 278 278 285 285 302 302 497 497 349 234 234 234 261 190 380 288 288 330 64 64 76 310 107 447 447 221 336 354 153 153 153 153 387 387 304 304 185 185 269 433 160 112 427 491 247 312 126 292 292 292 292 292 326 326 326 408 408 408 149 149 228 491 412 83 55 55 55 322 212 34 34 253 253 31 162 482 482 115 485 485 374 374 374 339 94 199 253 253 253 253 453 9 219 219 152 152 152 301 236 239 384 371 371 374 374 132 132 416 416 445 445 180 443 240 385 131 133 133 364 276 174 174 174 319 348 33 250 394 212 465 190 380 380 496 496 274 143 458 192 192 340 340 33 394 108 377 123 123 219 222 222 222 222 245 245 43 43 276 109 109 403 403 403 207 318 318 318 49 342 168 89 89 116 33 76 108 119 437 437 405 405 405 206 167 167 457 35 77 68 342 273 231 231 231 203 53 76 198 164 214 214 214 328 200 200 117 229 229 247 126 126 326 326 408 408 391 228 491 491 373 451 30 30 356 356 368 453 168 180 230 230 230 230 230 215 35 35 35 401 89 89 89 446 67 212 131 106 106 284 405 405 206 169 349 402 402 6 272 123 123 216 216 22 283 455 251 241 431 405 405 405 215 215 169 270 86 238 6 300 382 382 245 458 445 445 351 351 365 365 365 365 388 94 199 495 495 406 337 41 41 318 318 49 9 168 157 157 157 467 313 313 216 22 22 283 38 162 232 232 238 6 272 470 470 171 171 171 358 358 358 233 270 270 433 433 433 160 18 112 439 439 439 439 237 237 237 237 237 237 237 237 80 80 491 435 435 412 83 415 415 415 131 393 234 261 261 25 498 498 498 498 498 396 186 39 54 86 238 6 472 472 196 217 217 473 65 486 486 460 460 169 164 164 485 485 485 152 152 422 162 323 224 224 494 494 236 36 310 395 470 470 151 151 150 39 86 342 272 191 191 191 314 90 401 259 445 180 443 240 325 176 135 200 200 199 44 44 44 58 72 72 72 350 350 350 350 350 413 413 53 250 250 212 354 354 153 153 153 387 387 387 207 207 19 454 229 82 247 126 126 126 126 326 326 326 326 326 326 326 326 326 101 408 408 149 391 491 289 289 289 320 159 159 159 240 285 111 111 111 111 438 438 186 162 342 68 273 470 120 240 240 314 196 196 309 479 331 331 84 84 84 16 16 274 274 349 349 234 234 261 425 386 431 376 376 460 167 167 36 108 377 123 123 216 127 114 92 92 92 92 92 92 282 385 385 233 227 419 439 417 417 237 237 237 421 491 421 421 491 491 128 491 128 193 193 17 +17 17 17 296 211 211 52 52 52 363 363 363 408 51 51 228 491 491 320 114 0 0 0 301 399 473 65 476 476 476 171 252 8 420 420 420 324 464 106 297 297 297 297 297 293 293 42 42 42 147 380 499 499 499 428 428 85 85 85 207 207 358 24 34 34 111 111 319 203 53 10 479 307 307 307 307 61 167 167 478 478 68 68 172 115 470 403 403 403 403 135 135 135 200 248 212 127 114 222 222 468 313 313 10 10 479 331 307 307 307 307 426 426 206 206 206 385 24 227 419 419 439 439 225 225 225 225 225 80 80 289 320 159 159 159 159 236 35 196 196 479 331 231 231 231 16 274 274 274 251 251 241 431 431 319 319 348 64 64 212 34 242 242 116 394 478 162 482 482 238 6 161 487 487 213 213 252 252 335 14 411 145 145 486 486 468 468 467 467 467 134 215 8 270 270 86 9 142 393 155 332 332 332 245 399 217 429 429 429 429 246 246 246 19 19 454 454 225 225 417 417 80 80 412 287 111 111 111 438 438 186 162 342 68 115 273 470 120 120 120 37 24 24 404 427 229 491 247 312 126 292 292 292 326 326 326 408 408 408 391 228 228 289 289 289 320 209 445 278 278 278 314 314 196 217 429 429 429 429 464 464 44 44 44 10 10 10 309 479 331 171 171 171 252 325 34 494 173 402 402 221 259 354 354 153 153 153 387 372 396 285 415 415 415 415 457 26 251 241 431 444 444 213 213 246 358 358 39 433 433 86 86 6 6 227 419 439 417 417 237 237 237 237 491 491 491 47 47 491 491 491 316 316 316 73 73 80 435 412 114 0 139 139 139 293 293 8 420 420 420 420 464 44 44 44 44 42 42 147 147 380 288 278 278 278 271 271 39 342 86 105 105 144 472 196 196 331 331 231 231 274 399 217 473 65 486 460 240 285 300 382 245 58 72 72 489 489 374 132 132 8 152 152 152 152 324 416 458 445 445 180 120 120 120 37 385 233 227 419 427 229 491 247 312 126 292 292 292 292 292 326 23 23 326 326 326 101 101 101 149 149 228 491 289 320 159 159 285 285 106 111 111 284 481 293 169 349 205 205 25 485 485 485 139 139 293 497 335 14 411 411 213 213 213 213 213 318 368 368 453 168 41 324 485 382 406 467 467 340 340 340 116 33 250 70 46 46 46 46 438 438 399 217 70 65 480 480 480 480 480 480 85 299 299 299 299 339 64 212 89 89 322 116 394 478 478 232 232 68 26 26 81 444 213 252 215 129 401 478 232 68 68 115 273 470 315 315 315 450 450 413 64 64 131 300 382 382 467 415 415 236 35 196 196 479 331 265 428 428 85 146 358 233 270 342 224 118 118 118 118 402 345 152 152 152 458 445 180 443 443 285 34 44 44 8 32 259 354 153 153 153 372 372 467 467 299 394 76 465 445 351 351 116 94 199 331 171 171 171 171 252 325 34 324 324 464 275 275 275 303 48 48 417 417 417 491 237 237 237 491 421 421 421 491 128 128 491 128 491 305 128 128 193 193 17 +17 17 17 296 305 317 317 491 317 491 317 461 491 461 461 435 435 491 435 435 491 491 435 289 373 66 68 115 273 273 84 16 88 88 109 340 340 340 466 466 22 283 448 448 448 464 464 432 432 432 330 330 388 195 64 131 133 345 152 152 152 422 314 239 371 490 490 38 342 68 115 273 106 265 265 265 85 146 146 325 34 191 191 191 37 314 36 377 87 87 87 14 14 145 145 376 376 460 460 150 150 342 105 221 336 96 196 217 473 258 258 31 342 224 494 494 494 31 162 232 86 105 336 354 470 432 432 330 379 64 77 77 224 224 300 334 334 59 313 313 36 377 87 87 87 129 74 74 351 278 416 416 144 180 180 151 240 368 453 342 168 180 113 113 113 113 450 167 35 131 133 133 364 364 276 174 174 174 174 348 348 195 195 250 250 345 409 409 409 116 64 76 310 338 400 400 30 301 378 43 345 109 109 330 330 64 76 449 449 180 410 410 410 410 8 29 29 382 313 236 36 377 87 87 416 458 445 180 443 240 385 131 58 156 156 156 156 313 313 251 251 81 431 278 285 26 302 302 497 497 416 458 144 498 498 498 498 498 134 302 375 375 98 98 13 229 229 491 312 312 126 292 292 292 326 326 326 326 326 326 326 326 326 326 326 326 326 326 326 101 101 101 101 149 149 228 491 491 320 152 152 152 422 58 58 72 498 498 498 498 396 313 314 35 26 241 241 376 376 376 460 169 150 86 86 6 272 472 397 354 109 213 213 213 358 143 458 96 99 338 400 400 400 301 378 8 141 141 281 281 9 221 221 144 180 84 84 496 88 88 176 176 176 328 328 200 117 117 454 454 439 78 491 491 312 126 126 326 326 326 101 408 408 149 228 491 373 66 66 115 273 84 84 16 43 43 345 152 152 152 422 162 68 68 115 273 273 432 330 330 64 131 131 183 156 156 156 156 156 156 245 43 43 364 276 276 109 498 498 498 59 396 313 24 131 472 259 354 62 62 62 62 438 438 42 147 380 288 329 329 36 107 395 300 382 313 314 478 478 478 172 105 336 470 432 432 330 330 33 394 77 54 107 395 382 382 313 186 31 54 142 393 336 25 25 496 496 496 496 274 215 233 270 270 342 224 415 415 325 472 458 144 27 437 437 306 306 306 396 396 53 53 469 469 24 325 41 41 41 324 422 36 108 377 87 87 8 239 190 380 288 360 360 200 200 464 459 271 31 342 224 44 44 38 162 232 232 482 105 105 196 70 65 65 306 306 306 396 396 385 131 472 225 225 225 225 225 225 7 251 241 431 266 266 266 266 146 178 35 35 401 26 359 359 166 166 324 301 8 129 354 354 153 153 153 153 387 387 387 207 464 464 464 69 130 130 280 255 255 236 8 354 180 113 113 113 113 113 450 167 167 457 401 401 401 75 108 119 351 351 351 432 330 388 199 199 495 495 406 467 134 302 251 251 241 431 443 443 443 173 280 29 275 275 275 303 303 303 48 13 229 491 491 312 312 312 292 292 292 292 292 21 21 21 21 21 21 21 21 21 408 408 149 149 149 491 491 491 320 152 152 152 422 143 384 490 490 490 31 342 68 115 273 470 265 265 428 85 146 146 325 325 191 191 191 191 314 314 198 127 114 114 92 92 92 167 457 364 345 389 389 314 129 259 354 420 420 420 301 216 22 283 455 236 259 354 180 443 443 443 169 150 150 39 342 86 238 272 371 470 93 171 171 171 358 358 233 310 107 107 112 439 417 417 237 237 128 193 193 193 +17 17 296 296 296 184 184 412 209 287 424 424 424 424 424 274 274 122 285 34 34 242 116 479 331 230 230 230 169 349 402 96 36 377 87 87 87 129 354 420 420 420 420 246 3 464 223 223 130 402 478 232 232 232 172 115 273 231 231 231 231 203 53 53 219 219 219 219 219 485 374 374 132 132 186 39 54 342 224 89 340 116 33 394 212 384 371 374 374 88 88 176 176 135 200 200 248 76 465 310 107 395 395 441 441 153 153 153 182 372 372 372 372 304 304 185 185 269 269 9 142 97 397 336 147 380 499 499 428 85 146 146 325 34 106 106 106 426 426 426 426 206 169 169 352 352 352 352 352 352 97 97 225 225 225 83 55 55 55 322 67 64 212 219 219 219 464 180 180 319 319 348 200 464 242 116 94 331 230 169 169 402 402 6 377 87 87 420 420 420 422 422 129 310 161 161 487 487 288 288 290 290 434 434 339 64 212 131 180 230 230 230 167 167 457 401 401 491 190 190 190 488 488 488 405 206 215 215 35 29 334 334 59 59 452 452 263 229 491 247 312 126 292 292 292 1 1 1 1 21 21 21 21 21 21 21 260 260 260 260 391 391 391 491 491 320 345 152 152 152 301 399 217 473 360 360 360 434 339 64 64 108 377 87 87 416 445 485 278 173 280 57 57 57 53 473 44 44 44 416 129 259 144 484 484 484 285 131 58 72 72 72 437 350 350 350 350 350 413 203 381 335 335 14 440 145 194 446 446 33 394 478 478 482 482 482 482 105 336 208 441 153 153 153 182 182 175 81 176 176 328 328 303 117 48 417 417 417 417 237 237 237 491 47 80 491 80 491 7 7 152 152 152 58 58 110 110 254 254 240 34 44 44 236 36 108 119 119 351 486 139 175 175 81 81 469 416 8 79 380 288 288 365 365 282 203 203 53 394 393 155 155 332 332 165 399 217 473 258 258 258 31 342 224 494 494 368 453 168 168 145 329 329 329 175 81 81 469 416 416 453 453 470 365 365 365 365 388 64 212 300 382 382 313 186 54 54 105 336 354 470 432 330 379 379 77 77 54 224 300 334 313 236 36 377 377 87 236 236 93 93 93 93 93 93 207 207 207 207 19 454 229 247 247 126 126 126 326 326 326 326 326 326 326 326 101 101 149 149 491 289 491 127 5 5 455 399 217 473 65 290 290 171 139 139 139 293 293 399 217 65 136 136 136 136 282 388 33 394 32 259 354 190 380 499 405 405 206 206 285 449 34 277 277 24 314 393 155 155 165 165 165 165 466 22 22 283 38 162 342 238 6 272 470 470 171 171 171 358 99 436 436 60 60 298 298 303 303 117 48 229 491 247 126 126 326 326 326 408 408 408 149 228 491 373 66 68 68 68 273 470 403 403 403 403 207 135 135 135 200 200 248 212 127 0 0 0 0 378 378 347 347 347 347 245 143 458 144 27 437 437 319 319 319 53 53 176 176 135 328 200 200 199 125 125 125 125 348 466 283 455 38 349 234 234 261 25 346 265 265 85 85 146 146 438 349 349 234 234 261 164 273 498 498 498 313 285 34 41 324 324 422 143 259 161 161 161 487 487 288 290 290 290 434 434 434 339 394 36 377 377 87 236 10 479 331 331 428 428 428 428 207 207 358 358 233 465 227 419 439 78 421 491 491 193 193 17 +17 17 17 296 296 184 184 184 435 435 66 172 115 273 273 84 344 16 274 399 399 473 65 486 486 486 460 460 169 164 164 164 485 485 485 301 378 43 364 109 109 189 330 330 64 76 465 377 123 123 236 32 259 354 190 380 499 428 428 85 146 146 35 35 133 133 147 288 288 278 173 280 29 29 382 313 236 108 377 87 87 399 217 473 213 213 213 252 325 325 183 57 57 57 57 203 381 117 404 13 229 491 247 312 126 292 292 292 292 292 292 21 326 326 326 408 408 408 408 149 149 228 491 320 217 473 258 258 258 342 342 224 494 494 494 258 31 162 232 232 68 68 105 105 336 470 329 329 330 330 379 64 77 342 224 300 300 382 245 245 43 345 109 389 497 497 122 239 161 79 499 499 405 206 215 35 29 57 57 57 203 70 106 426 426 426 426 169 169 352 352 402 198 127 114 114 264 264 264 264 59 59 59 452 263 263 417 417 417 417 170 170 47 47 491 491 2 2 47 2 316 2 491 491 316 316 73 73 289 435 435 83 255 255 130 402 458 144 441 441 153 153 372 372 396 271 186 39 342 323 97 427 247 247 126 126 326 326 326 326 101 101 149 149 491 373 338 400 400 400 400 30 301 416 416 180 180 84 84 496 496 274 71 368 368 453 168 106 426 426 426 426 413 348 64 465 377 123 123 123 43 276 346 346 346 428 85 146 146 252 36 478 66 68 115 470 486 365 365 365 330 388 33 77 77 342 68 238 6 272 470 171 171 252 99 99 436 60 60 116 94 58 58 156 156 156 313 186 186 162 68 115 273 273 279 279 279 279 279 375 375 352 352 352 352 352 352 112 112 417 417 237 237 237 237 237 237 491 237 237 491 237 237 237 362 362 362 362 491 491 362 362 362 491 218 491 218 491 491 211 218 218 366 366 491 491 366 366 366 366 491 366 366 366 366 163 316 316 316 316 73 73 491 320 7 473 258 258 258 31 342 224 494 494 494 31 9 142 397 147 380 329 329 329 329 329 310 107 395 302 302 497 497 122 129 259 190 190 190 488 499 265 265 85 146 146 325 325 34 382 313 285 325 183 156 156 156 156 396 313 186 162 172 115 273 279 279 279 279 279 293 169 352 352 155 125 125 322 94 335 14 14 411 297 297 297 297 297 293 43 345 109 109 109 171 422 186 162 68 68 68 105 105 336 354 213 213 213 143 192 192 135 135 135 200 248 58 156 156 156 156 245 245 399 70 65 480 480 480 480 85 299 299 299 303 243 227 419 427 56 491 247 312 126 292 292 326 326 326 326 326 326 326 101 101 408 228 228 373 338 338 400 400 400 400 301 143 129 74 190 492 492 492 186 162 342 172 444 444 444 444 252 325 325 191 191 191 314 36 108 87 87 87 38 342 86 105 336 470 213 213 213 143 458 192 277 277 277 314 196 196 479 331 331 315 315 315 315 450 450 450 98 263 417 417 417 225 225 225 72 72 110 202 202 202 202 202 280 135 135 135 200 464 464 255 255 236 239 259 107 395 180 151 151 169 150 150 86 238 6 272 191 191 191 240 58 183 156 156 156 156 245 399 399 217 217 473 432 432 330 348 64 212 449 302 302 497 497 14 14 411 145 145 486 460 240 325 449 469 469 469 236 259 108 449 485 485 374 374 374 37 24 259 377 377 123 123 216 22 283 283 38 162 68 342 224 494 494 399 217 217 473 290 290 171 171 171 252 318 368 342 342 176 176 176 328 200 248 248 76 74 485 213 213 213 213 186 39 342 224 462 462 462 462 402 196 196 398 398 398 398 398 374 374 132 132 185 185 185 323 390 18 112 427 56 56 491 491 15 15 15 15 15 193 193 193 193 17 17 +17 17 17 17 296 363 52 51 51 51 51 491 184 184 491 184 7 7 7 364 276 109 109 109 443 443 139 139 293 293 293 497 399 217 70 473 65 329 495 406 406 467 134 139 139 175 423 423 423 423 423 263 263 417 417 417 237 237 237 237 237 201 237 80 435 435 435 440 287 111 111 111 111 139 139 293 293 293 122 35 310 107 395 395 151 151 31 342 342 86 238 6 108 119 119 351 443 151 139 240 240 219 219 477 477 477 477 477 132 8 259 74 74 425 425 386 386 290 290 290 290 434 434 434 434 434 339 466 212 127 45 45 45 325 34 111 111 111 111 111 438 438 438 422 349 164 164 214 214 214 360 360 200 248 76 465 219 219 152 152 222 498 353 353 313 236 239 371 371 374 374 88 88 176 135 135 135 200 44 44 44 399 70 65 65 428 428 146 146 252 449 449 324 324 422 349 349 234 234 261 25 441 153 153 153 153 132 81 81 459 459 469 99 447 447 447 447 238 336 214 214 214 214 214 328 328 200 303 303 404 404 229 491 247 312 126 326 326 326 101 101 101 149 149 228 491 287 287 44 44 44 44 44 42 42 42 147 147 380 288 278 278 31 342 86 86 105 105 336 485 41 324 324 422 349 164 164 164 214 214 214 214 328 328 200 200 200 200 248 248 248 127 114 92 92 92 92 169 35 77 66 142 397 397 276 346 346 346 355 355 37 37 24 227 419 419 439 78 491 491 312 312 312 312 292 292 1 21 21 21 21 21 408 408 408 408 149 228 491 289 219 152 152 152 152 236 325 371 180 84 84 496 350 167 457 457 479 331 84 84 84 16 274 43 43 276 181 181 181 181 35 449 449 485 152 222 353 353 245 416 458 445 180 443 443 240 325 449 176 176 135 328 200 117 117 48 414 414 47 47 47 47 491 491 47 491 491 80 491 7 7 219 219 152 152 222 353 372 245 245 245 129 259 354 190 380 288 288 360 360 200 135 135 135 135 200 200 44 44 44 44 162 232 482 482 482 238 336 161 487 288 290 290 290 434 339 339 64 76 107 447 447 6 6 119 351 437 91 91 265 85 85 85 139 139 293 122 122 131 34 340 340 116 33 394 465 377 123 123 219 477 222 222 222 372 372 245 58 72 268 268 268 268 268 268 169 186 269 323 224 242 116 33 58 58 72 350 350 350 350 274 274 203 381 117 229 247 247 126 126 326 326 326 326 326 101 149 149 228 491 412 83 55 55 55 55 322 67 212 219 152 152 152 152 132 236 239 239 371 180 84 84 496 274 274 274 457 196 479 331 84 84 274 88 88 44 44 44 38 232 232 68 68 115 273 278 360 360 200 200 64 212 302 302 302 497 497 349 205 259 214 214 214 214 200 200 464 255 255 8 354 180 113 113 113 113 113 167 285 449 57 57 57 57 203 203 195 10 309 331 157 157 157 157 372 245 245 43 364 364 181 181 181 181 285 449 34 356 281 453 342 6 272 490 490 490 31 9 105 336 336 494 494 494 368 453 168 418 418 418 99 436 436 60 60 298 116 199 356 356 281 31 9 26 26 241 266 266 266 266 266 146 146 358 143 458 192 472 196 309 479 331 157 157 157 157 372 372 245 245 43 364 276 181 181 181 167 167 35 478 478 68 224 273 153 153 153 396 285 285 462 462 462 402 129 259 74 74 351 351 351 351 264 468 468 468 468 467 467 467 11 275 379 379 77 77 342 342 451 30 30 30 30 58 58 110 110 110 486 486 460 460 240 24 131 404 229 247 126 326 193 193 17 +17 17 363 51 51 228 491 7 309 479 331 157 157 157 387 372 372 396 313 58 72 110 268 268 268 268 268 274 274 183 451 30 30 30 356 368 342 9 26 251 241 266 266 266 266 178 458 96 26 359 474 474 301 236 87 87 87 87 36 108 119 308 308 308 308 396 313 94 199 180 113 113 113 113 450 450 413 233 227 419 439 78 170 491 312 187 187 187 187 12 12 12 12 260 260 260 391 391 149 491 491 491 7 7 276 346 346 346 265 85 85 146 464 177 177 177 177 133 133 141 141 141 281 342 168 106 350 350 350 350 348 250 359 166 166 324 301 251 251 241 376 376 376 376 460 169 150 342 86 238 272 397 397 109 109 213 213 213 143 458 144 180 106 111 111 111 438 438 42 147 147 380 288 443 240 240 325 34 340 340 116 466 22 283 455 129 74 351 351 351 171 171 252 215 215 259 29 334 334 59 59 245 58 72 72 268 268 268 268 88 88 88 44 44 399 217 217 473 65 136 136 136 136 136 136 136 282 388 94 34 89 340 116 131 183 257 257 257 257 281 9 142 221 336 364 276 346 346 428 428 146 146 349 205 352 106 230 230 230 215 215 35 35 133 364 364 276 109 109 443 443 443 169 150 39 86 86 238 6 272 69 223 130 198 22 448 448 464 106 106 265 85 85 85 146 175 175 81 81 275 275 116 64 131 427 229 247 126 326 326 326 326 326 101 101 149 228 289 289 491 108 377 295 295 295 295 35 192 44 44 44 8 8 8 354 153 153 153 387 387 387 146 464 464 113 113 113 113 206 285 449 34 69 223 130 44 44 44 94 335 14 411 411 153 372 372 372 396 349 349 234 261 25 242 116 94 199 255 38 31 342 68 115 273 106 265 265 85 85 85 175 175 81 81 203 203 381 404 335 440 55 55 322 67 131 183 451 451 30 30 30 422 186 162 68 68 115 273 189 443 240 385 131 472 393 393 234 234 261 261 25 265 265 265 85 146 146 300 382 382 313 143 36 377 123 123 216 283 283 455 72 72 268 268 268 268 268 169 169 39 342 342 224 415 415 415 314 401 196 479 331 428 428 428 428 358 358 233 36 227 427 427 247 247 312 126 292 292 292 292 292 23 408 408 408 408 391 491 491 373 66 66 68 68 115 273 470 443 240 325 449 277 277 277 277 325 335 14 14 287 284 125 125 125 348 348 195 33 394 76 401 82 74 492 492 492 492 396 215 35 354 459 459 459 271 39 342 86 142 196 70 65 65 495 406 406 467 288 139 139 175 175 423 423 423 423 263 229 229 247 126 126 326 101 408 149 149 491 412 83 83 55 55 322 322 67 10 10 309 479 398 398 398 398 468 468 313 359 359 166 166 166 324 301 301 32 32 32 354 354 498 498 308 313 348 64 76 198 198 114 57 57 203 53 76 465 377 377 123 123 123 88 44 44 44 129 458 208 208 190 487 278 278 31 342 86 105 336 336 354 340 340 116 466 466 114 222 222 222 468 245 8 8 354 470 120 120 330 240 379 243 233 270 270 433 433 160 112 112 56 56 421 491 421 491 491 491 421 421 128 491 128 128 193 17 17 +17 17 17 296 296 317 305 305 317 461 491 491 435 435 435 435 287 83 194 194 194 194 322 67 212 34 111 111 111 111 438 438 10 479 331 84 84 88 88 88 44 44 348 10 10 479 331 493 493 493 216 300 300 382 245 143 465 445 351 351 343 343 343 171 358 368 342 9 142 397 336 345 347 347 347 406 467 467 467 340 116 199 255 255 236 236 384 180 106 405 405 405 215 215 96 272 449 191 191 191 314 314 32 401 259 354 153 153 153 387 387 387 146 146 219 219 219 219 485 374 374 368 186 323 323 238 6 272 87 87 87 38 162 68 68 68 68 115 273 151 151 178 178 35 458 96 472 164 198 22 448 448 448 448 464 180 443 443 120 120 120 416 416 233 233 270 49 433 433 433 160 427 247 247 126 126 126 292 326 326 326 326 326 326 408 408 149 149 228 289 491 127 114 0 0 0 0 422 143 458 144 27 389 389 389 389 314 35 196 242 242 33 33 76 465 401 259 354 190 380 288 288 295 143 458 192 183 57 57 57 203 88 69 223 223 223 130 280 277 277 277 277 385 24 227 419 419 439 439 439 439 237 237 237 237 47 47 47 491 491 316 491 80 373 412 412 188 188 118 118 118 118 118 118 402 219 219 152 152 152 132 132 58 58 72 110 110 254 254 240 325 34 145 145 460 460 169 150 342 86 6 472 221 70 46 46 46 46 464 464 255 255 240 314 4 280 106 265 265 265 85 85 146 358 39 39 342 342 224 340 340 466 466 22 283 399 473 65 486 486 460 460 285 449 334 334 382 59 452 229 229 247 126 126 326 326 326 326 326 101 101 101 149 149 228 491 289 320 345 407 407 407 407 35 36 310 107 447 219 219 219 152 152 152 132 236 32 239 384 371 371 278 278 325 242 242 242 379 243 243 36 227 472 472 221 336 336 384 371 374 374 374 374 132 132 132 399 70 473 65 329 329 42 406 467 134 139 139 175 423 423 423 423 423 452 263 229 259 247 312 126 292 292 23 23 23 408 101 149 149 149 228 491 412 287 111 111 111 111 111 438 438 325 34 202 202 202 402 402 162 232 68 68 172 115 470 470 120 120 240 240 314 314 131 393 393 393 155 155 332 332 332 372 372 245 399 399 217 217 217 70 65 65 498 498 498 186 186 54 54 172 224 41 324 324 422 186 162 232 232 68 68 68 68 115 470 470 403 171 171 252 416 458 401 196 196 309 331 307 307 307 61 167 35 35 108 377 87 87 38 164 164 164 164 164 214 214 214 360 200 200 76 458 192 69 223 223 402 66 342 224 344 344 344 449 449 44 44 44 38 164 164 164 214 214 214 214 214 328 200 200 248 248 212 127 114 92 92 92 92 169 35 77 77 66 86 142 397 397 336 276 346 346 265 355 37 37 24 227 419 419 439 439 439 78 237 170 491 421 421 491 491 491 491 341 15 15 15 15 193 193 193 17 17 +17 17 17 17 296 52 52 52 52 52 52 52 52 52 461 51 51 51 184 491 184 184 7 7 7 127 127 258 258 258 258 258 39 342 342 86 86 238 221 336 336 401 310 107 395 395 329 84 84 496 496 496 496 274 274 215 8 96 270 342 86 221 221 144 27 437 437 319 319 53 53 76 205 29 29 469 469 24 325 176 176 328 328 200 200 195 248 49 49 68 68 68 444 444 444 444 444 434 434 434 339 394 212 131 472 472 196 309 479 331 331 265 265 428 146 146 216 300 300 382 236 36 377 87 87 87 88 44 44 44 349 349 234 234 261 261 25 432 432 432 432 330 330 388 195 195 195 195 195 64 212 131 472 472 221 309 479 157 157 157 157 372 313 236 108 377 123 123 123 88 88 88 255 255 251 251 241 431 431 306 306 306 306 306 306 396 203 53 381 217 70 65 65 329 495 406 406 134 134 139 175 175 423 423 423 423 423 263 263 417 417 417 417 237 237 237 47 47 47 491 491 435 435 435 435 373 338 338 338 400 400 400 30 422 94 199 398 278 278 325 449 191 191 191 314 314 478 478 68 68 68 238 6 371 470 443 443 240 325 26 134 359 359 359 474 474 324 324 464 464 426 426 426 426 426 426 282 388 303 303 48 48 417 170 491 170 491 28 28 28 491 28 362 491 362 362 491 362 491 362 362 40 491 491 211 211 369 369 369 369 369 369 21 21 21 21 21 21 21 21 21 21 260 260 260 260 260 260 391 391 391 491 73 289 412 412 287 287 111 111 111 111 438 438 24 384 371 180 84 84 350 274 167 314 36 384 490 490 490 116 479 331 331 265 265 265 85 85 146 146 216 127 300 382 382 313 186 162 232 172 115 231 231 231 231 53 76 76 164 214 214 214 328 200 200 340 340 116 250 250 345 181 181 181 181 35 131 219 152 152 152 422 186 162 232 172 115 273 470 403 403 403 207 301 301 42 147 147 329 329 329 329 252 143 310 107 395 302 302 302 497 98 98 13 417 417 417 417 237 237 237 237 237 237 237 80 80 491 491 412 412 287 287 111 111 111 438 202 202 402 58 72 110 110 110 460 240 240 35 77 478 68 224 231 231 231 53 90 90 76 465 208 208 441 441 106 481 481 426 426 426 426 203 53 381 471 49 49 342 142 221 196 46 46 46 46 438 186 162 68 68 115 273 279 279 279 279 279 279 375 169 352 352 352 352 427 229 491 247 126 126 292 326 326 326 326 326 326 101 101 149 149 228 228 289 320 159 159 159 159 159 35 196 196 217 473 329 329 329 329 460 329 164 164 485 485 485 485 423 132 378 43 345 141 141 141 281 9 86 86 6 108 119 119 351 351 264 468 468 468 467 134 134 134 8 100 100 497 497 186 162 68 68 115 273 470 443 240 285 449 34 125 125 125 125 348 199 199 277 277 277 385 227 227 419 439 417 417 237 237 237 237 237 237 80 80 80 491 412 412 287 287 111 111 111 438 438 143 144 389 389 389 314 478 478 68 68 172 267 267 267 267 267 301 216 127 114 92 92 92 92 92 240 240 143 35 36 478 66 172 224 273 84 84 16 88 88 111 111 111 111 438 438 416 416 445 445 210 210 210 171 252 173 173 280 34 120 120 120 120 388 303 303 303 48 417 417 417 170 491 491 491 421 128 128 193 193 17 +17 17 17 17 296 363 363 363 363 51 51 228 491 412 412 177 177 177 177 356 478 66 68 68 172 115 273 344 344 344 274 274 186 162 232 232 172 115 273 273 139 293 293 293 122 122 272 34 242 319 203 53 381 217 217 473 65 486 486 460 169 169 164 164 485 485 485 374 422 186 162 68 68 273 470 443 443 240 71 71 342 224 257 257 257 257 453 9 196 217 70 65 480 480 480 480 299 299 299 339 212 34 106 125 125 125 388 94 199 475 475 475 475 475 475 475 475 422 349 164 164 214 214 214 214 328 328 200 200 248 212 127 45 45 45 45 385 131 133 133 364 409 409 409 409 348 94 183 183 451 451 30 30 301 236 36 384 71 71 71 71 71 71 71 71 71 368 453 342 168 106 111 111 111 111 438 464 106 297 297 297 297 297 43 109 109 109 469 186 39 342 86 142 393 261 25 444 213 213 139 139 251 241 81 177 356 236 71 71 142 221 196 70 46 46 46 438 438 236 239 384 485 485 485 374 374 252 449 449 41 41 41 324 3 143 36 377 87 87 87 416 445 445 278 278 173 280 34 120 120 120 275 388 303 303 303 117 404 78 491 491 312 312 312 292 292 292 292 292 292 21 21 21 21 21 21 21 21 408 408 408 149 149 491 412 412 83 55 55 322 67 212 131 34 253 253 253 253 31 342 342 86 142 221 155 332 332 332 332 332 216 283 455 42 42 147 380 288 278 278 278 271 39 342 433 433 105 105 458 192 419 439 439 417 417 237 237 237 237 237 237 237 237 237 237 491 237 80 80 435 80 491 491 127 114 0 0 222 468 356 281 453 9 142 221 336 147 380 288 278 278 31 342 86 86 105 336 270 270 342 224 494 121 203 53 394 394 465 259 190 190 487 104 104 325 41 324 324 301 10 309 398 398 398 398 468 245 335 14 411 411 204 204 204 204 204 204 29 337 337 337 324 422 164 164 164 214 214 214 214 214 200 200 464 415 415 236 129 259 354 354 91 91 206 206 285 285 41 41 324 301 236 239 384 71 71 71 71 71 71 368 342 168 340 340 340 466 466 114 258 258 258 31 142 142 397 397 364 109 109 498 498 134 139 375 375 375 122 122 131 427 229 491 247 126 126 292 292 292 292 326 326 326 326 326 101 101 408 149 228 491 491 127 114 222 222 468 468 356 356 281 9 9 142 42 147 147 380 288 278 278 31 31 342 86 105 105 336 458 270 270 224 89 89 203 53 394 465 465 74 485 213 213 252 215 129 354 100 302 497 497 49 342 58 72 110 202 202 202 202 202 280 176 135 135 200 248 248 310 107 107 395 395 106 153 153 387 122 122 161 300 242 242 116 94 199 223 223 130 198 198 222 222 222 222 406 406 467 467 350 350 350 350 350 350 350 413 413 413 195 199 118 118 118 118 402 177 177 177 177 458 144 351 351 319 319 319 71 71 71 71 49 86 238 6 123 123 123 216 216 114 92 92 92 92 92 92 92 282 385 385 131 419 427 229 247 247 126 126 292 326 326 326 326 326 101 101 149 149 228 289 289 491 127 114 0 0 0 0 0 252 252 325 180 106 350 350 350 350 413 413 465 131 106 106 297 297 297 297 43 109 109 109 109 318 31 39 142 6 272 119 308 308 308 308 313 116 94 199 331 486 113 113 167 167 457 364 276 109 109 139 139 139 375 375 98 98 13 417 417 417 417 237 491 237 421 421 491 491 491 491 193 193 17 +17 17 17 296 296 363 363 363 363 363 51 51 149 228 228 289 412 83 83 194 194 194 194 194 194 388 388 64 64 131 472 198 127 361 361 361 361 361 388 67 10 10 479 331 331 84 496 496 173 173 280 29 255 38 162 54 482 105 105 336 144 496 496 496 496 274 99 99 436 395 395 50 50 50 31 9 142 397 147 380 499 428 428 146 167 457 35 401 259 208 208 386 386 496 496 496 496 186 39 86 238 6 377 123 123 123 22 448 448 448 464 180 106 265 265 85 85 146 134 175 81 81 275 275 388 303 243 131 419 439 439 225 417 80 80 491 491 209 188 177 177 177 325 356 356 356 281 342 342 168 242 242 116 64 212 34 253 253 368 453 342 168 118 118 118 118 118 205 402 152 152 152 152 378 378 345 347 347 347 313 416 458 445 180 443 240 325 325 176 135 135 200 200 248 183 57 57 57 57 203 53 76 205 155 165 165 165 165 165 335 14 411 411 360 360 360 200 200 248 248 248 441 302 81 81 275 275 116 64 212 131 157 157 157 157 157 216 216 22 283 455 38 162 68 68 6 272 470 470 171 171 171 171 358 358 358 358 270 270 270 433 160 112 427 229 247 247 126 292 292 326 326 326 326 326 326 101 408 408 391 228 491 491 373 451 451 30 143 458 445 445 351 365 365 365 460 460 76 465 465 420 420 420 324 301 399 217 217 383 383 383 383 383 383 310 310 447 447 6 336 371 278 278 349 349 155 29 242 275 116 394 90 393 155 332 165 165 53 65 353 353 353 353 396 186 162 342 115 273 279 279 279 279 279 279 293 169 169 352 270 433 390 390 18 112 112 439 439 78 56 56 491 491 28 491 491 312 491 341 341 12 12 12 12 292 21 21 21 21 21 21 21 21 21 21 21 21 21 369 260 260 260 260 260 260 40 40 40 40 40 40 163 163 491 491 305 305 491 316 316 316 73 491 491 320 345 109 109 139 139 139 175 175 81 111 111 111 111 438 438 438 58 72 72 72 72 72 437 496 496 496 496 496 215 35 35 354 177 177 177 131 133 345 389 389 497 497 36 108 108 308 308 308 308 308 313 94 199 113 113 113 113 206 285 285 106 106 297 297 293 293 42 42 147 380 499 428 428 428 146 358 358 233 227 419 419 439 417 417 417 237 237 237 237 237 237 201 237 237 201 201 201 80 373 435 435 108 179 179 179 179 179 314 196 217 473 258 258 31 342 224 494 494 494 281 9 142 397 147 147 329 329 329 329 252 310 107 395 302 302 302 497 175 175 81 89 340 94 199 255 255 236 36 119 119 351 351 496 350 350 350 413 413 466 466 22 45 45 236 129 129 82 74 74 425 425 386 386 386 290 290 290 290 434 339 33 359 359 166 166 166 3 14 411 188 121 121 116 64 212 384 469 469 416 143 458 445 158 158 158 158 158 325 34 191 191 314 131 58 156 156 156 156 245 8 129 259 74 74 351 351 290 290 290 290 434 339 339 195 248 90 393 393 155 262 262 100 100 497 497 497 122 239 384 371 180 180 486 315 315 113 450 450 450 167 37 233 270 270 433 390 390 18 112 112 439 439 193 193 193 17 +17 17 296 296 184 491 209 287 350 350 350 350 350 350 250 359 359 81 166 166 324 324 422 314 32 239 384 371 180 84 350 350 350 413 413 243 131 472 232 232 232 68 115 273 470 470 403 403 171 464 464 111 111 111 111 438 438 239 371 371 278 242 314 242 242 242 394 133 364 276 276 153 153 387 387 396 348 339 219 219 477 477 477 88 118 118 118 118 402 183 451 30 30 301 32 129 259 354 354 498 498 498 498 498 396 396 242 116 195 471 368 453 9 142 221 144 208 79 288 288 360 360 360 434 339 200 33 248 248 212 445 180 171 171 171 252 252 8 354 100 302 497 497 497 49 453 9 6 384 371 180 315 315 315 315 450 450 450 413 413 94 199 157 157 245 245 129 259 74 190 189 189 236 35 478 478 482 482 482 482 482 238 6 161 487 288 178 178 458 458 192 196 196 479 398 360 360 434 434 339 199 34 340 340 116 466 22 283 455 43 276 109 109 139 139 139 139 375 375 375 375 98 13 229 491 247 126 126 126 326 326 326 326 326 326 326 326 326 101 101 101 149 149 228 289 320 287 111 111 111 438 438 58 110 498 498 498 498 396 285 34 223 223 280 44 44 44 458 445 445 351 351 343 171 171 358 358 39 342 342 224 224 106 410 410 410 410 173 173 29 29 495 467 467 44 44 116 10 10 398 398 398 398 374 132 236 32 259 354 190 380 499 319 319 348 348 471 471 49 9 142 397 109 109 288 178 178 143 458 208 397 347 347 467 467 44 44 94 14 14 411 153 372 372 396 349 349 352 29 242 116 199 44 44 38 342 342 115 273 106 265 265 85 85 146 175 81 282 203 53 394 90 310 107 395 351 91 91 91 85 85 139 450 293 122 35 401 384 371 278 278 314 314 401 401 127 114 92 92 92 92 167 385 35 227 427 229 247 126 126 326 326 326 326 101 408 149 149 228 491 412 55 55 55 322 67 466 198 5 5 455 38 72 72 72 72 72 437 424 424 424 424 424 497 497 122 349 401 205 261 25 25 365 365 365 365 460 203 53 359 81 81 41 324 324 422 36 371 180 265 265 265 85 146 146 325 34 340 340 116 33 76 76 205 234 234 234 261 25 485 286 286 286 468 245 349 349 205 262 262 100 497 497 14 14 145 145 486 460 460 416 458 242 242 116 199 41 41 41 19 318 185 433 433 433 160 112 427 56 170 491 312 312 312 187 12 12 292 12 12 12 12 12 408 408 260 391 491 491 316 491 491 491 491 412 287 287 350 350 350 350 350 359 359 81 166 166 464 177 177 177 133 133 141 141 141 281 453 168 44 44 416 208 79 498 498 498 498 134 302 497 175 81 340 340 340 466 466 114 92 92 92 240 325 34 121 121 121 379 77 77 342 86 238 6 272 11 11 379 379 243 471 49 433 390 390 18 18 112 439 439 237 237 237 237 237 237 237 305 305 12 260 260 260 260 260 260 260 163 163 316 316 316 316 491 7 7 7 364 109 109 139 139 139 293 293 43 43 345 347 347 347 313 313 94 479 307 307 307 61 167 131 472 401 259 144 445 443 443 240 325 176 135 135 200 464 44 44 44 416 458 144 79 498 498 498 499 499 302 375 375 98 98 13 417 417 417 237 237 237 237 237 237 237 237 491 491 80 316 491 80 491 289 435 66 66 179 179 179 179 314 196 217 70 65 329 329 406 406 467 134 139 139 175 423 423 423 423 423 263 263 229 247 15 193 193 17 +17 17 363 363 51 51 228 209 83 145 253 253 253 453 342 342 118 118 118 118 349 402 221 259 74 74 441 153 153 387 387 146 368 453 342 242 196 309 199 176 135 135 200 200 248 250 364 276 109 109 443 139 139 139 293 293 293 185 49 9 142 397 345 347 347 347 347 406 467 255 255 129 129 74 74 485 485 485 286 286 468 468 468 134 359 359 359 166 166 324 3 422 349 234 234 234 261 25 25 443 443 330 203 53 473 242 242 199 199 89 446 94 199 255 255 143 259 144 27 437 319 319 53 53 76 465 81 81 469 469 99 447 447 221 196 291 291 291 291 291 243 227 419 427 247 247 126 126 326 326 326 326 326 101 149 149 491 412 83 55 194 194 194 388 67 10 10 479 331 307 307 61 167 167 36 108 87 87 87 8 420 420 420 422 32 239 161 161 79 79 288 443 443 240 325 34 191 191 24 36 34 340 340 340 466 22 283 455 143 458 445 445 351 343 343 171 358 358 39 342 342 224 69 69 130 280 29 44 44 236 8 354 153 153 153 153 387 387 387 207 207 207 454 13 229 82 247 312 312 312 292 292 292 292 292 292 292 292 1 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 408 408 408 408 391 391 289 491 209 287 111 111 438 438 314 196 309 479 463 463 463 463 29 29 382 313 236 32 32 239 161 79 79 288 360 360 360 434 434 203 53 69 223 223 402 221 259 108 119 295 295 295 295 295 458 135 135 135 135 200 200 44 44 44 44 129 401 491 144 79 498 498 498 498 139 302 302 497 122 122 449 87 87 87 8 354 380 288 288 360 328 200 464 230 230 230 230 230 230 215 35 29 419 419 225 225 225 225 225 225 225 225 225 225 225 225 287 287 111 111 111 438 378 43 364 276 174 174 319 348 348 64 212 161 300 382 382 277 415 457 196 217 258 258 258 342 342 224 494 494 453 168 145 329 329 329 175 81 81 469 416 416 96 342 168 470 365 365 365 365 348 64 212 300 382 382 186 186 54 54 105 336 336 74 470 432 330 379 64 64 77 77 224 224 334 382 245 349 155 332 332 332 236 239 384 371 374 374 88 176 176 135 135 200 200 277 277 277 277 233 227 419 439 439 439 225 225 225 391 491 80 491 491 73 491 320 159 159 159 314 35 259 127 114 264 264 264 468 59 59 452 263 417 417 417 417 47 47 491 491 491 435 197 373 338 338 338 338 338 400 400 400 400 95 95 246 246 246 301 378 378 345 389 389 389 314 196 242 242 33 33 310 107 338 161 161 161 487 288 360 360 360 200 200 243 96 96 393 155 165 165 165 53 44 255 236 239 384 180 180 405 405 206 206 35 96 272 176 135 135 200 200 44 44 44 44 72 72 72 424 424 424 424 424 424 497 497 497 335 14 226 82 411 411 157 372 372 396 349 349 234 261 25 242 242 94 199 459 38 31 342 342 273 106 265 265 85 85 85 175 175 81 81 203 53 118 118 118 118 402 338 400 400 400 422 36 108 377 295 295 295 416 458 277 277 277 325 34 340 340 340 116 64 76 108 377 123 123 123 88 88 156 156 156 156 245 58 58 110 110 120 120 120 120 120 37 24 24 404 439 78 229 491 312 312 15 292 292 292 292 292 292 292 21 21 21 15 193 193 193 193 17 17 +17 17 17 17 296 52 52 52 52 52 52 52 52 461 461 491 461 461 184 491 491 305 305 289 7 217 473 258 258 258 342 342 224 494 494 494 453 9 142 397 147 329 329 329 329 329 143 36 449 302 302 302 497 43 43 345 389 389 389 285 34 202 202 402 402 251 241 266 266 266 266 146 178 35 35 272 87 87 38 162 342 86 238 6 272 470 403 403 464 464 464 330 348 76 76 108 377 139 139 139 497 399 217 217 473 486 486 486 460 460 169 164 164 485 485 485 374 422 143 144 445 210 210 210 210 210 210 203 53 58 58 350 350 350 350 350 203 250 250 345 333 333 220 216 22 257 281 453 9 168 121 121 53 76 465 74 74 441 153 153 372 372 313 449 449 191 191 24 335 14 14 411 153 153 372 372 372 396 349 352 352 352 275 275 275 116 303 303 48 229 491 247 312 126 126 292 292 292 292 326 326 326 23 23 23 101 408 408 408 149 149 228 491 289 491 354 159 159 159 159 314 133 133 456 456 456 456 349 349 234 261 386 386 151 151 151 178 35 96 36 449 176 176 135 135 200 200 248 212 127 45 45 45 325 177 177 177 177 345 345 389 389 389 129 129 259 420 420 420 420 464 464 44 44 44 416 129 401 401 144 484 484 484 484 314 314 32 401 401 259 108 377 351 374 374 374 132 88 88 106 145 284 315 315 315 450 450 450 372 304 304 304 49 342 342 168 415 415 415 26 26 26 241 241 444 444 213 213 358 39 39 342 142 221 336 354 354 255 38 349 205 155 148 148 148 148 372 245 58 183 257 257 257 453 168 255 255 42 42 147 380 499 499 265 85 85 146 173 173 280 302 302 375 497 98 229 82 247 126 126 126 326 326 326 326 326 326 101 101 149 228 491 373 338 400 400 400 400 30 143 144 27 121 121 121 33 394 76 208 208 386 444 444 374 374 252 325 34 191 191 191 314 36 36 377 87 87 87 416 416 180 84 84 496 88 88 230 230 230 230 215 215 35 401 198 198 283 283 455 42 42 147 380 380 288 496 496 496 496 274 274 274 37 24 24 36 377 377 377 123 123 272 123 123 123 42 42 147 147 499 499 405 405 206 215 29 469 313 314 32 401 401 401 354 180 180 443 139 139 139 139 375 375 375 375 185 49 342 342 168 89 116 33 394 76 108 119 351 351 139 139 293 293 122 216 283 283 455 116 10 398 398 398 398 398 374 374 132 132 132 185 185 269 390 390 390 18 18 112 439 237 237 237 237 237 237 491 47 491 47 491 491 435 435 435 289 491 209 177 177 177 177 131 133 133 345 389 389 389 314 129 478 66 68 68 115 273 498 498 498 240 240 35 35 359 359 359 166 166 166 301 301 217 217 473 476 476 476 476 143 458 192 44 44 38 342 342 115 273 432 432 379 379 394 77 68 68 115 418 418 418 418 418 99 99 436 436 60 298 379 379 471 478 66 342 115 273 151 178 416 458 192 242 116 64 76 108 377 123 123 116 10 479 331 331 319 319 319 282 388 303 303 117 48 229 247 15 15 15 193 193 193 17 +17 17 17 363 363 51 51 491 412 412 55 55 55 322 67 33 250 217 473 258 258 258 31 342 224 494 494 494 281 9 142 397 147 147 329 329 329 329 329 329 36 310 107 395 302 302 302 497 497 122 122 401 401 401 401 401 384 371 371 286 286 286 286 313 134 359 359 166 166 166 166 301 301 251 251 251 241 266 266 266 266 266 173 173 402 402 6 108 377 87 87 217 473 476 476 476 143 458 192 44 38 68 68 115 273 432 432 330 379 394 77 342 342 115 470 418 418 418 418 99 99 436 436 60 60 298 303 303 48 48 417 417 237 237 237 491 237 2 491 2 2 491 491 491 435 435 491 491 435 435 491 289 373 66 66 115 273 344 496 186 99 400 400 400 30 422 143 36 108 295 295 295 295 295 458 192 156 156 156 186 186 54 172 115 279 279 279 279 279 349 352 29 44 255 255 43 43 276 109 109 403 403 403 207 207 207 207 19 3 454 225 66 66 68 68 68 115 273 231 231 231 231 53 250 250 345 346 426 206 167 167 457 36 108 377 123 399 70 65 65 329 42 42 147 380 288 256 139 175 175 423 423 423 423 271 368 269 142 142 397 147 456 456 456 251 251 241 444 444 444 213 246 246 358 358 173 352 352 352 112 225 225 225 225 225 225 225 373 393 155 155 155 332 332 332 313 216 216 5 5 455 455 251 241 431 486 376 376 460 460 449 449 300 382 382 245 349 349 205 261 25 180 189 139 139 293 122 122 131 183 156 156 156 382 313 313 236 239 239 384 180 180 486 315 113 113 450 450 167 35 270 270 342 224 340 340 340 33 394 76 393 205 261 25 485 286 286 286 286 304 304 304 49 447 142 397 147 456 456 456 456 173 280 106 265 265 265 265 85 85 146 146 173 280 176 176 135 328 200 200 199 89 319 319 348 33 64 212 300 123 216 198 448 448 448 448 464 121 121 121 53 394 76 155 425 425 386 134 88 88 11 11 11 379 471 49 342 168 69 69 223 130 129 196 196 217 473 258 258 258 31 224 224 494 494 494 281 142 142 397 147 329 329 329 329 329 36 449 302 302 302 497 497 31 142 221 336 74 351 351 443 443 150 150 342 342 224 494 494 203 53 459 459 459 368 453 342 168 275 203 203 381 48 13 13 491 491 247 312 312 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 260 260 260 260 260 260 260 260 391 391 391 491 491 73 491 320 345 109 139 175 175 81 223 130 280 280 106 297 297 297 297 297 297 293 293 349 349 352 164 164 164 164 164 214 214 214 214 200 200 200 200 471 471 49 453 198 114 45 45 385 457 14 401 226 82 209 463 463 463 463 463 280 29 382 382 382 245 245 43 43 364 364 276 276 109 347 347 498 498 59 59 59 245 14 14 411 157 157 157 372 372 245 245 43 43 364 276 109 109 329 139 139 293 497 122 8 354 420 41 41 41 19 19 454 454 417 417 417 417 237 47 80 491 80 491 435 435 209 188 177 177 236 36 107 395 180 486 486 460 178 178 458 192 485 469 134 175 158 158 158 158 158 325 449 191 191 191 314 314 196 217 473 258 258 258 342 342 494 494 494 281 9 142 397 397 147 329 329 329 329 329 143 310 107 395 302 302 497 497 43 364 345 409 409 409 116 314 76 465 400 400 400 301 378 345 141 141 141 31 232 232 68 68 115 273 470 171 171 171 252 349 349 402 26 359 166 166 166 324 464 464 113 113 113 113 169 167 36 449 34 340 340 466 466 22 283 455 497 251 251 241 431 431 290 290 290 434 434 339 339 117 404 13 229 491 247 15 15 15 193 193 193 17 +17 17 17 363 363 363 51 51 228 184 491 491 320 188 177 177 177 177 143 401 82 384 71 71 71 71 71 453 9 142 221 336 155 487 288 485 278 26 359 166 166 166 166 422 162 232 232 68 68 444 444 444 360 360 339 53 53 473 253 253 453 342 168 118 118 349 402 25 111 111 111 438 399 70 65 319 169 150 342 105 221 336 420 420 422 236 239 161 79 79 288 288 360 360 360 203 53 176 176 328 328 200 303 48 13 491 491 312 312 312 312 312 292 292 292 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 369 21 21 21 21 21 21 21 21 21 21 21 260 260 260 391 391 391 391 491 491 491 320 320 346 346 84 139 139 175 175 81 111 111 111 438 203 53 478 478 232 232 172 115 106 106 153 153 372 372 337 337 337 301 349 155 155 332 332 332 240 216 114 92 92 92 167 457 35 401 259 74 74 441 441 441 153 153 153 372 372 396 313 219 219 219 219 180 180 319 319 348 348 248 250 250 276 174 174 174 388 94 199 89 89 446 116 10 10 309 479 331 84 84 496 399 399 473 65 459 31 31 342 86 86 238 6 470 470 470 171 171 171 358 24 458 192 419 419 439 439 417 237 237 237 237 237 237 237 237 237 237 237 237 237 80 7 7 217 473 65 329 486 460 460 169 169 164 164 25 485 485 485 378 378 88 89 89 116 33 250 70 65 329 329 245 42 147 134 134 139 175 175 423 423 423 423 314 314 239 384 371 180 84 350 350 167 457 309 479 331 84 84 88 88 14 14 411 475 475 475 475 475 475 475 475 422 349 164 214 214 214 214 200 200 255 255 8 354 113 113 113 113 450 167 167 457 36 310 107 107 395 395 106 153 153 122 122 285 300 300 300 275 275 94 117 404 13 414 80 80 491 491 412 412 83 55 55 55 322 67 64 212 114 0 0 139 139 175 81 154 154 154 458 96 66 68 105 105 336 470 151 151 178 35 96 401 36 272 57 57 57 203 64 394 76 377 87 87 87 420 420 420 420 301 43 364 276 346 346 265 265 265 85 146 146 368 453 9 300 300 382 406 467 89 89 446 33 394 478 68 68 68 238 6 272 470 470 443 240 325 41 324 324 286 459 459 469 216 198 114 242 446 94 199 257 257 257 453 168 106 350 350 350 350 350 413 195 195 33 90 32 465 208 79 380 288 365 365 365 365 388 348 64 76 90 393 261 25 91 91 91 91 493 216 300 334 334 59 452 263 229 247 126 126 326 326 326 326 193 193 17 +17 17 17 296 296 317 491 491 184 184 184 412 177 177 177 177 177 177 401 478 66 66 68 68 115 444 444 444 444 360 339 339 53 471 71 342 342 483 440 287 319 319 319 388 348 195 195 90 90 143 401 491 445 445 445 351 351 72 72 351 365 365 365 365 330 94 199 41 41 324 324 143 36 377 87 87 87 164 214 214 214 200 200 192 69 223 130 29 44 44 236 36 310 107 395 351 437 91 91 91 85 139 139 293 122 35 198 45 191 236 131 90 401 82 208 79 288 360 360 360 434 339 248 248 212 445 180 171 171 171 252 215 8 100 100 497 497 497 269 342 68 68 115 273 231 231 231 203 94 58 58 268 268 315 268 450 450 98 98 229 82 247 312 312 126 292 292 292 292 23 23 23 23 408 408 408 391 391 491 491 491 289 289 127 114 0 0 313 186 447 196 479 463 463 463 463 29 29 382 245 8 354 137 137 137 137 116 250 250 250 276 174 174 174 319 319 348 466 466 212 127 114 264 264 264 264 59 59 452 245 349 205 155 155 332 332 332 332 372 245 245 217 217 473 65 486 486 460 460 169 164 164 164 219 219 485 485 132 88 88 89 89 446 33 250 70 65 65 329 495 42 147 380 288 139 139 175 175 423 423 423 423 423 355 245 43 345 347 347 347 245 416 32 239 208 79 380 499 84 496 496 274 274 413 94 479 230 230 230 230 215 35 401 491 354 345 409 409 409 409 466 466 466 22 283 455 116 10 398 398 398 398 132 132 58 58 72 72 268 268 268 268 268 169 169 39 54 142 397 397 141 141 281 54 9 221 336 336 354 180 139 139 139 375 375 274 122 122 227 227 419 439 439 439 417 237 237 237 237 237 47 47 491 47 316 316 491 316 73 491 289 435 188 118 118 118 118 118 402 198 198 0 0 0 0 464 464 464 463 463 463 463 280 29 382 382 245 245 43 364 276 347 347 347 498 498 396 396 313 313 24 36 310 107 107 395 395 106 153 153 387 122 122 161 161 487 334 275 275 116 117 48 229 229 247 126 126 126 326 326 326 326 326 101 101 149 149 228 491 320 320 345 407 407 407 143 107 395 356 257 257 281 9 142 72 437 306 306 306 306 396 313 186 36 377 87 87 87 8 354 425 251 251 241 444 444 444 444 246 246 173 402 402 397 409 409 409 116 250 250 276 174 174 174 319 348 466 250 241 367 367 367 367 35 458 270 270 342 224 415 415 415 457 259 127 114 57 57 203 203 381 48 48 13 13 491 247 312 126 126 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 260 260 391 391 391 391 491 491 289 412 287 287 111 111 111 111 111 438 378 378 364 389 389 389 389 389 314 242 242 242 33 76 76 465 259 354 420 420 420 324 246 3 464 340 340 340 116 466 466 466 114 92 92 92 92 285 34 106 106 153 372 372 396 245 349 349 352 25 242 242 116 33 471 49 49 9 482 338 338 338 395 485 374 374 374 132 132 318 318 49 269 9 142 393 155 332 332 332 332 332 467 467 475 475 475 475 475 475 475 475 422 349 164 214 214 214 214 214 200 200 117 404 404 404 225 225 225 225 225 193 193 +17 17 296 363 363 363 225 225 289 7 7 7 70 70 65 65 284 284 284 265 265 265 85 85 146 146 438 8 354 159 159 285 285 111 111 111 111 438 438 143 129 259 74 74 351 278 278 278 325 183 41 41 324 324 3 183 183 57 57 57 57 203 381 381 117 117 417 417 417 417 417 417 80 80 320 127 114 92 92 92 92 240 35 77 342 9 142 397 336 181 181 181 181 181 385 385 36 227 419 439 439 417 417 237 237 237 237 237 237 237 237 237 237 491 362 491 491 362 491 491 362 362 362 362 491 435 211 211 491 369 369 21 21 21 21 21 21 21 21 21 260 408 408 391 391 391 228 491 491 373 66 66 68 115 273 273 344 344 344 16 274 274 186 186 162 232 68 172 115 470 179 443 120 240 240 314 35 196 217 217 473 258 258 258 342 342 224 494 494 494 281 9 142 397 147 147 329 329 329 329 252 143 36 449 395 302 302 302 497 122 36 36 377 123 123 123 216 22 283 455 43 364 364 276 346 346 346 346 265 85 85 85 139 139 293 293 122 122 131 472 133 147 147 380 288 496 496 496 274 368 31 342 86 142 336 336 354 109 496 278 99 99 436 395 50 50 50 50 185 453 342 168 180 113 113 113 113 169 285 449 34 69 223 130 198 22 283 283 455 349 234 234 261 25 424 424 424 497 497 122 466 81 459 459 271 31 342 224 69 69 130 130 280 156 156 156 156 245 245 72 72 437 306 306 306 306 306 396 396 37 24 227 419 419 427 229 491 247 15 193 193 193 +17 17 296 296 317 184 184 184 184 289 320 108 119 351 351 486 460 460 215 96 35 272 300 382 382 313 236 129 75 108 119 351 351 374 374 374 374 132 132 98 98 13 417 417 170 170 170 170 442 491 442 442 312 187 442 12 102 102 12 442 12 12 23 260 260 260 260 260 391 391 316 289 289 289 320 7 217 70 473 486 486 486 460 169 169 35 164 219 485 485 374 132 143 129 321 144 27 351 329 329 329 169 352 164 221 221 321 354 29 382 382 396 313 24 131 483 226 321 188 356 356 31 162 232 172 224 494 494 494 129 74 190 190 499 499 499 265 265 265 85 85 299 299 185 185 433 433 86 238 6 419 439 56 56 237 237 237 237 237 28 28 28 491 28 28 362 362 491 362 491 305 362 362 362 362 491 491 218 40 362 218 218 218 218 491 218 491 218 218 218 218 218 218 218 218 491 491 218 218 218 491 491 369 369 491 369 369 369 369 369 369 369 21 21 21 21 21 101 101 101 149 391 228 321 321 320 7 217 217 473 486 486 486 460 460 169 169 164 164 485 485 485 374 132 143 129 458 144 27 27 351 329 329 151 169 169 164 352 221 221 321 354 29 334 334 59 59 313 24 131 483 440 89 55 446 322 466 22 5 455 38 162 482 482 172 115 273 106 499 372 406 406 467 302 302 497 497 497 497 399 217 217 473 65 264 264 264 264 468 468 468 467 37 236 314 401 401 310 107 395 180 106 499 405 405 206 206 178 96 96 272 472 472 472 401 321 144 27 27 437 319 319 319 53 53 76 205 155 29 134 134 134 134 8 359 359 474 474 474 3 335 14 411 410 410 410 410 410 173 29 29 313 313 216 22 448 448 448 14 411 411 171 171 171 171 252 252 131 472 196 196 70 70 65 65 265 265 85 85 139 450 293 497 49 54 86 238 272 377 123 236 129 259 190 380 499 499 428 428 146 146 358 457 457 133 42 147 380 380 288 288 173 173 29 334 334 59 59 452 452 263 229 491 312 312 312 312 292 292 292 1 1 1 1 1 21 21 408 408 408 408 149 149 289 321 321 412 177 177 177 177 131 133 141 141 141 281 453 168 44 44 44 129 259 190 190 487 288 278 240 325 34 324 324 301 378 42 147 147 380 84 84 496 496 496 496 274 274 37 24 419 439 225 417 417 80 321 320 7 7 147 147 380 499 319 319 348 94 199 176 176 135 135 135 200 200 199 255 255 255 251 251 241 241 431 235 235 235 235 235 235 200 248 248 212 354 255 236 36 108 397 397 487 360 360 360 360 339 339 33 33 394 478 478 232 68 172 115 196 479 331 331 151 319 151 240 416 314 96 393 393 234 234 234 261 25 106 306 306 306 306 396 203 53 394 478 162 86 86 6 272 470 470 120 120 120 37 37 24 77 270 9 142 397 345 333 333 333 220 220 173 164 164 402 472 196 309 479 331 331 315 315 315 450 88 88 242 116 94 199 255 255 416 416 458 445 445 361 361 361 120 120 282 282 282 388 195 117 117 229 247 247 126 126 326 326 326 326 101 101 149 149 321 412 287 44 44 44 215 35 354 278 278 325 34 462 462 462 402 402 401 401 321 259 354 106 106 481 481 481 293 293 186 39 342 224 224 494 242 203 217 473 41 324 324 422 422 349 234 234 234 234 234 261 261 25 498 498 498 498 396 245 245 43 43 364 345 109 109 496 496 496 37 24 314 36 108 377 87 87 236 239 161 161 79 499 499 499 265 85 85 146 146 173 402 402 205 205 234 161 161 487 487 487 374 374 374 132 132 132 98 229 247 15 15 193 193 17 +17 17 363 51 51 51 228 321 320 157 157 157 157 372 467 44 44 44 58 72 72 72 437 437 481 481 481 481 175 175 81 84 84 84 16 274 274 274 43 345 345 109 109 264 468 245 245 245 43 364 276 276 346 346 284 265 85 85 85 139 139 293 293 122 122 472 221 129 321 75 74 425 425 386 431 319 319 319 319 203 203 381 381 381 471 185 49 342 342 342 142 72 437 189 189 189 319 189 200 200 180 180 113 113 113 113 167 167 457 401 321 75 127 114 222 222 222 468 245 349 349 234 205 205 261 25 278 139 139 139 293 203 399 70 429 324 324 324 301 32 239 259 354 425 425 241 374 374 374 374 374 132 132 132 381 381 381 381 404 13 13 78 170 170 491 491 491 491 28 491 341 211 341 12 292 292 21 21 21 21 21 21 21 21 408 408 408 408 149 228 321 321 320 7 127 5 448 448 14 14 411 411 264 264 264 264 264 468 468 468 245 43 43 345 141 141 281 162 54 232 482 482 105 397 397 109 109 213 213 213 358 358 36 36 472 397 397 333 333 220 220 314 198 127 22 283 455 236 129 321 354 190 79 380 288 443 443 443 169 169 164 164 164 164 69 69 130 130 402 402 196 217 217 473 432 330 116 94 337 324 324 324 3 3 197 197 226 226 209 209 145 145 486 460 460 215 215 35 29 100 302 497 497 335 14 411 153 153 153 372 372 396 396 36 36 107 107 395 334 334 334 59 37 37 24 471 270 269 433 427 427 247 247 126 126 326 326 326 326 408 149 149 149 321 412 83 55 55 55 322 466 466 22 5 5 455 399 217 217 473 65 443 443 443 240 325 34 84 84 84 496 274 274 186 162 54 482 482 482 482 482 482 26 26 26 241 431 84 496 496 496 215 35 96 96 36 272 255 255 255 43 364 109 109 403 403 403 171 464 464 340 340 116 466 466 22 283 455 236 239 384 371 278 278 278 31 342 86 86 238 6 272 11 11 11 379 379 471 471 49 9 238 6 272 87 87 87 58 72 156 156 255 42 42 147 147 380 499 499 265 265 85 85 146 146 368 368 368 342 342 224 242 242 116 116 33 33 33 90 250 217 217 473 473 278 278 278 31 39 86 86 238 238 401 491 270 270 270 342 168 69 462 462 130 402 221 401 321 321 74 190 190 437 498 498 498 498 498 498 134 16 302 182 302 302 497 175 175 81 89 89 446 446 67 212 131 472 221 401 321 74 190 492 492 498 498 498 215 215 35 259 74 100 100 100 100 375 375 375 375 98 43 7 7 7 276 346 346 346 346 315 85 85 85 139 139 293 293 293 122 35 198 22 5 5 251 251 241 431 278 278 285 449 302 302 497 497 497 8 8 259 354 29 498 498 498 498 498 396 37 37 314 77 478 232 232 232 172 115 115 273 470 486 486 365 365 365 365 328 200 200 200 248 253 253 253 31 342 342 168 118 118 118 118 280 29 177 177 177 314 131 133 364 364 276 347 347 347 347 498 498 467 313 313 216 216 22 283 283 455 43 43 364 276 174 174 174 174 319 319 348 348 195 195 195 64 212 212 93 93 93 93 93 464 464 69 462 130 402 402 162 232 68 172 115 273 273 319 319 203 53 53 29 29 495 467 467 89 340 116 94 335 14 14 411 411 297 297 297 297 297 182 182 497 122 216 216 22 283 448 219 219 219 219 286 286 286 286 286 286 286 59 59 59 452 452 263 417 417 417 491 491 421 421 491 421 128 128 491 128 491 128 128 193 193 193 17 +17 17 17 296 296 52 52 52 52 52 52 52 52 52 408 101 51 149 149 321 321 7 7 217 473 65 329 329 460 460 169 164 164 485 485 485 485 378 88 121 121 121 116 33 394 239 107 395 470 153 153 387 387 146 146 314 35 259 22 283 455 236 239 161 79 79 499 499 265 85 85 85 146 173 173 280 145 145 460 460 460 169 402 36 272 495 495 467 257 257 257 257 342 168 180 84 350 350 350 350 413 33 394 90 393 234 261 261 25 486 486 486 460 460 169 99 436 436 436 60 298 298 298 275 303 303 117 404 229 491 247 126 126 126 326 326 326 326 326 326 408 408 408 149 228 321 321 412 188 154 154 154 96 96 172 172 273 470 151 151 215 215 96 36 272 161 495 495 467 467 135 135 200 248 466 22 283 455 399 399 70 65 65 84 496 496 203 53 53 291 291 379 379 49 9 142 397 345 409 409 409 409 58 183 451 30 30 30 301 399 217 217 473 443 443 443 240 36 449 472 133 133 364 276 109 278 278 278 399 217 473 136 275 275 116 195 199 89 89 322 67 199 58 110 110 110 110 254 254 314 35 401 75 377 87 87 87 10 10 309 479 331 331 284 284 405 206 206 206 314 314 401 75 108 377 123 123 123 216 114 114 57 57 57 381 381 381 48 48 229 414 491 312 312 126 292 292 23 23 23 23 23 101 260 391 391 228 289 321 321 373 155 155 332 148 148 387 387 372 406 467 467 242 121 203 53 394 76 74 190 190 487 288 330 379 33 394 77 342 342 273 470 443 240 240 133 133 133 345 382 313 285 14 411 284 265 85 85 146 146 175 175 175 81 275 275 275 116 64 212 131 219 152 152 152 378 353 353 353 313 186 54 54 224 494 236 259 74 437 496 496 496 496 274 186 186 323 238 238 6 272 87 87 116 10 479 331 106 284 405 206 206 167 35 75 377 377 123 123 14 14 411 411 411 297 424 297 182 182 293 293 175 175 89 89 446 446 33 394 478 478 232 232 172 172 273 273 319 319 348 64 64 212 161 300 337 41 219 219 219 219 152 152 152 399 217 473 213 213 213 252 449 449 106 125 125 125 125 466 22 283 455 42 42 147 380 380 84 496 496 496 496 274 274 37 24 404 427 321 247 126 126 326 326 326 101 101 149 149 149 321 321 320 7 345 109 409 181 240 216 300 300 300 219 219 152 152 152 152 10 10 479 331 84 84 84 274 216 216 114 57 203 399 70 157 157 157 157 313 10 479 331 331 307 307 307 307 61 167 167 233 227 227 419 439 417 170 170 170 170 28 491 28 28 491 491 28 362 491 491 40 305 305 305 40 40 40 40 40 40 163 491 491 366 366 366 163 491 316 316 491 435 289 321 321 320 7 217 217 473 65 486 486 486 460 460 169 169 164 164 485 219 219 485 152 152 301 422 239 36 161 79 79 288 288 443 240 325 34 191 191 191 314 131 472 14 14 226 321 321 411 297 297 297 297 297 297 297 297 297 297 293 497 497 497 122 43 364 276 109 109 278 278 399 217 473 136 275 275 275 195 195 195 335 440 440 154 154 154 458 96 96 68 172 273 470 151 151 215 35 96 272 472 472 472 196 70 70 70 65 495 495 380 467 256 139 175 251 241 423 423 423 423 355 89 89 446 116 33 250 250 217 473 258 258 258 342 342 224 494 494 494 31 9 142 142 397 147 147 329 329 329 329 329 143 36 310 107 395 302 302 302 375 98 98 13 229 491 247 312 15 15 15 15 193 193 193 17 +17 17 17 363 363 363 363 363 408 51 51 491 321 451 451 30 30 30 58 72 110 110 110 254 254 254 285 44 44 44 94 199 331 319 319 319 348 394 76 465 144 27 27 437 319 319 319 53 53 77 205 155 29 6 134 134 134 8 354 100 100 100 497 349 349 234 234 234 261 261 25 485 213 485 286 139 139 175 175 81 176 176 328 328 200 200 117 229 321 247 126 126 326 326 326 101 408 149 228 321 321 45 45 45 45 198 22 5 455 399 473 473 494 38 162 232 232 86 238 6 272 485 485 286 468 468 337 337 485 324 459 459 271 31 54 9 142 221 336 259 208 208 190 487 288 213 213 213 252 36 310 107 395 334 334 304 304 49 269 142 397 397 141 141 281 162 232 232 172 115 273 444 444 213 252 143 458 208 79 487 313 236 143 36 26 359 166 166 166 324 301 251 251 251 251 241 241 431 376 376 376 460 460 169 169 352 164 164 25 176 135 328 200 464 464 415 415 415 415 415 36 131 183 57 57 57 57 57 381 381 48 48 417 417 170 170 170 28 491 28 491 341 491 341 341 491 163 491 491 316 316 316 321 435 289 373 451 451 30 30 301 399 217 473 65 476 476 464 464 202 202 202 8 137 137 137 116 90 90 76 208 441 441 346 346 428 428 146 146 24 35 133 133 147 380 499 428 428 428 146 143 449 449 89 340 116 394 212 164 164 214 214 360 360 200 76 465 192 192 176 135 200 200 248 248 478 342 172 115 273 84 84 84 84 274 98 13 229 247 312 126 126 292 326 326 23 23 101 101 101 149 391 228 321 321 155 155 155 148 148 372 58 183 451 30 30 378 378 141 141 141 281 342 168 44 44 44 94 199 335 14 14 411 284 284 284 405 405 206 206 240 314 26 26 241 241 367 367 367 458 321 192 176 135 200 200 248 248 465 259 74 74 492 492 492 492 271 150 342 342 224 224 242 116 199 459 459 469 469 37 24 75 310 107 107 447 97 225 225 80 321 321 320 345 333 333 220 220 22 44 44 94 199 331 319 319 319 348 348 33 394 394 212 239 445 180 290 290 290 290 434 434 434 339 33 359 359 81 166 324 324 422 422 349 234 234 234 234 261 25 25 278 278 278 416 458 192 300 334 334 355 355 452 263 229 247 247 126 126 326 326 326 326 101 101 149 149 228 321 412 83 55 55 446 322 67 33 33 250 251 251 241 241 431 235 235 235 235 235 235 235 235 235 348 200 200 200 248 335 14 14 14 226 209 287 265 265 265 85 85 146 146 146 299 242 242 339 195 195 248 248 212 239 208 79 79 288 288 403 403 403 171 171 171 3 58 58 72 72 72 72 110 264 264 264 264 264 264 264 468 468 59 313 216 216 127 45 45 45 236 129 75 108 119 351 351 151 151 151 169 178 36 310 447 447 447 6 272 257 257 257 257 257 99 338 338 338 447 482 238 238 336 321 75 371 374 374 374 374 215 129 354 176 176 176 328 200 200 248 248 186 338 338 338 395 470 106 424 424 424 424 497 122 122 131 300 334 334 304 304 185 185 269 9 427 229 247 247 15 15 193 193 193 17 +17 17 17 363 363 51 51 228 321 412 55 55 322 67 212 34 44 44 462 349 234 234 234 234 234 261 261 25 424 424 424 424 424 182 182 182 497 497 497 497 497 497 186 186 162 482 482 482 482 115 115 273 106 405 405 405 405 206 169 349 352 352 402 6 272 472 221 336 336 354 190 380 288 315 315 315 315 450 450 450 413 413 348 33 33 33 394 394 32 239 75 354 485 213 286 286 286 286 286 286 334 59 59 37 37 24 131 404 225 225 225 225 225 320 345 407 407 407 407 36 107 107 400 30 30 464 254 254 254 314 133 133 364 276 276 153 153 153 387 372 396 388 94 199 145 463 463 173 280 29 313 186 162 342 342 224 494 379 379 379 77 342 224 30 30 378 345 141 141 281 31 9 6 272 119 397 441 109 432 330 348 64 64 76 449 41 41 41 41 19 19 454 13 417 170 170 170 491 28 28 28 28 491 491 2 2 2 491 362 362 102 362 362 362 491 362 362 491 305 305 366 366 366 366 366 435 435 316 316 435 435 435 289 321 321 321 209 188 340 340 33 33 90 349 205 234 261 25 470 486 376 376 376 376 460 460 178 178 233 96 75 419 427 321 247 126 126 126 326 326 326 326 326 326 326 101 408 408 408 149 321 321 373 451 451 30 30 30 58 58 110 254 254 254 254 314 26 26 251 241 367 367 367 367 367 35 96 321 75 34 415 415 415 385 314 259 108 119 397 441 441 432 330 330 379 64 76 36 449 41 41 41 41 19 19 454 454 454 414 47 47 80 80 80 321 321 7 7 32 280 104 104 104 104 104 337 337 337 324 301 399 217 383 383 383 383 383 383 310 107 34 253 253 253 342 224 30 30 30 301 26 241 367 367 367 35 96 321 272 34 415 415 143 478 478 68 68 115 273 278 278 178 96 96 86 86 238 6 272 41 41 41 19 19 454 417 417 417 47 47 47 47 491 47 47 47 80 249 435 435 321 321 321 7 7 251 241 241 431 376 376 460 178 178 458 192 192 176 176 135 200 200 200 464 255 251 251 241 431 278 278 278 26 26 302 302 175 175 69 223 130 130 198 22 283 455 416 144 208 79 380 288 288 290 290 434 339 339 199 459 459 459 271 271 39 39 433 433 160 160 112 112 56 56 56 56 28 28 491 28 28 28 28 362 362 491 362 362 362 362 362 362 491 491 362 362 491 362 218 40 40 211 369 369 369 369 369 369 369 369 369 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 260 260 260 260 260 260 260 260 260 260 260 163 163 163 366 491 316 316 316 491 316 73 73 289 321 321 7 409 409 409 409 94 58 183 451 30 30 301 378 42 147 147 288 213 213 213 252 36 107 447 447 6 472 472 221 336 321 354 190 380 499 428 428 428 146 146 143 35 472 133 133 147 147 147 288 288 278 173 173 280 29 334 334 355 355 452 452 263 263 417 417 417 442 80 80 435 321 435 127 127 0 222 222 245 378 8 345 141 141 281 281 9 9 238 196 309 479 331 231 231 231 231 231 274 274 186 162 232 172 172 115 273 480 480 480 85 85 299 299 94 199 69 223 130 280 34 475 475 475 475 475 475 324 422 36 310 161 161 161 487 487 288 290 290 290 434 434 339 303 303 48 48 48 417 417 417 417 193 193 17 +17 17 296 296 296 317 491 491 184 184 184 321 435 435 451 451 30 30 30 422 164 164 106 106 106 153 387 387 285 449 451 30 301 378 378 141 141 281 281 9 238 6 108 377 344 344 374 132 132 88 147 109 498 498 498 498 134 134 175 81 474 41 41 19 19 186 162 68 172 115 273 84 496 274 58 451 451 30 30 422 422 36 108 119 119 437 265 265 265 85 85 146 146 24 131 257 257 257 257 31 54 9 142 397 441 153 153 153 372 396 313 186 342 342 224 340 340 340 466 22 283 448 448 219 219 464 180 180 306 306 306 306 396 396 24 285 69 223 130 198 22 283 455 38 162 232 482 482 482 105 196 70 70 65 65 481 481 481 481 182 293 293 497 122 129 401 321 321 190 190 380 380 499 428 428 428 146 146 385 36 472 133 42 147 147 288 278 278 173 280 29 29 245 58 58 72 496 496 496 274 274 143 75 108 119 119 351 351 256 139 139 139 139 139 375 375 375 98 13 321 247 126 126 126 326 326 326 326 326 326 326 101 149 149 228 321 412 83 55 55 322 67 67 133 364 109 109 189 330 330 348 64 36 34 180 410 410 410 410 410 280 29 29 313 236 36 377 377 123 123 216 22 283 283 38 162 232 68 238 272 470 470 171 171 171 99 99 436 436 60 298 116 33 199 58 58 268 268 268 268 268 268 268 169 169 186 39 323 390 390 18 18 112 112 56 491 56 312 491 491 312 491 12 12 12 12 21 23 23 260 260 260 391 149 228 491 321 321 127 5 5 455 251 251 241 431 235 235 235 235 235 235 235 235 348 200 248 248 90 465 259 74 425 386 386 431 486 486 460 240 35 35 393 393 155 155 148 148 148 148 387 387 203 53 250 345 141 141 141 281 9 483 14 226 226 209 411 297 297 297 297 203 53 53 65 496 496 368 31 54 6 6 272 490 490 490 368 453 9 168 498 498 498 498 396 313 325 449 191 191 191 37 24 24 404 13 417 47 47 491 491 321 80 321 321 435 5 448 448 14 14 411 411 350 350 350 350 350 350 466 81 166 166 324 301 301 251 251 241 431 278 278 173 280 176 176 328 200 248 394 465 208 208 487 487 213 213 213 422 36 310 107 395 334 495 406 340 340 340 340 33 394 478 66 68 68 115 273 273 265 265 265 428 85 146 358 24 36 472 472 336 259 354 420 420 420 360 360 135 135 135 135 200 200 44 44 44 44 416 321 144 208 498 498 498 498 498 498 134 302 302 375 375 293 497 98 225 489 489 489 489 378 43 345 141 141 281 162 232 68 68 115 115 470 278 278 325 325 176 176 176 135 135 200 200 199 125 125 125 125 199 44 44 44 129 259 321 74 437 437 265 85 85 85 85 85 139 175 175 81 462 462 130 402 99 338 338 338 338 395 395 360 360 200 200 200 248 212 302 302 302 302 497 185 49 342 168 415 415 415 198 22 448 448 448 448 154 154 154 458 321 96 96 482 482 447 238 6 161 487 487 288 360 360 360 434 434 301 399 217 473 65 432 432 432 120 330 388 388 303 303 303 243 243 131 419 427 491 229 247 15 15 15 15 15 193 193 193 193 193 17 +17 17 17 296 363 363 363 363 363 101 51 51 51 321 321 320 7 217 473 65 329 329 329 460 460 329 329 164 164 485 485 219 485 485 477 374 132 132 132 32 321 321 354 180 264 264 264 468 468 313 134 175 359 166 166 166 301 10 479 331 84 84 496 496 285 459 459 459 31 342 342 224 176 176 328 200 200 248 212 45 45 45 325 177 177 177 177 43 43 364 364 276 346 346 141 141 141 368 453 342 168 44 44 416 416 321 144 27 498 498 498 498 467 302 302 375 375 98 98 98 13 225 321 435 225 7 373 66 66 68 68 172 115 273 265 265 265 85 85 146 146 285 285 302 302 302 497 122 122 472 472 259 74 74 437 437 311 311 311 460 169 150 39 86 238 6 272 300 334 334 406 406 467 356 281 281 9 9 142 221 336 321 208 208 441 109 278 278 178 143 458 192 26 359 359 474 166 464 464 253 253 253 342 142 221 321 74 437 437 405 405 206 150 342 342 224 494 134 8 100 100 100 100 497 345 333 333 220 216 180 113 113 113 113 167 167 457 251 251 241 367 367 367 367 458 192 192 135 135 200 200 464 415 415 415 415 415 285 156 156 156 156 59 59 452 263 13 229 491 247 312 312 312 292 292 292 292 292 1 21 21 21 21 21 21 21 260 408 408 408 149 149 321 321 321 373 72 110 430 430 430 430 430 430 325 183 183 451 30 30 301 301 251 251 241 367 367 367 367 367 367 233 96 96 6 227 419 427 56 170 442 442 201 201 201 201 201 201 491 47 491 491 491 435 435 435 320 451 451 30 30 422 458 144 389 389 389 131 58 72 72 72 437 306 306 306 306 306 396 313 285 26 359 166 166 166 166 464 202 202 349 205 234 234 261 261 25 470 443 139 139 139 293 497 122 35 75 377 87 87 87 116 10 10 479 331 84 84 496 496 274 285 325 459 459 459 271 31 342 342 86 198 198 127 283 5 236 129 129 321 108 119 119 351 432 432 330 330 33 195 471 77 269 238 272 447 397 336 147 456 456 236 236 239 310 107 395 278 278 278 325 34 469 469 236 36 108 449 41 41 41 324 324 246 3 464 89 89 446 446 212 131 145 443 178 178 458 96 96 86 105 105 336 470 470 151 178 143 96 401 321 75 108 119 418 418 418 418 418 186 99 436 436 60 60 298 116 199 69 223 130 402 402 156 156 156 245 14 14 411 411 145 145 145 460 460 240 325 34 469 469 143 321 108 449 485 485 485 374 132 132 325 325 34 89 446 446 67 131 34 154 154 154 96 96 54 142 105 336 190 380 288 151 151 169 99 436 436 60 60 298 298 298 303 303 303 48 404 229 491 491 312 312 312 292 292 292 292 292 292 21 21 21 21 21 21 21 21 408 408 408 408 391 491 491 321 373 373 338 338 400 400 400 30 301 378 378 345 141 141 141 281 162 162 232 68 68 115 470 278 278 278 325 176 176 176 135 135 200 248 248 212 127 114 264 264 264 264 468 245 245 43 43 364 364 109 109 109 171 171 171 252 449 449 176 176 135 328 200 200 248 248 393 155 155 332 332 332 186 162 232 68 68 68 115 273 231 231 231 231 53 53 394 76 164 164 214 214 214 214 214 328 200 200 117 335 14 209 157 157 157 157 157 313 186 186 162 232 68 68 68 115 273 231 231 231 53 53 212 212 65 493 493 240 325 41 41 41 19 19 19 454 229 491 247 15 15 193 193 193 17 +17 17 17 363 51 51 228 184 321 321 209 83 194 194 194 194 194 194 194 194 194 282 388 195 195 212 212 131 483 197 197 197 197 66 66 68 68 115 273 494 278 330 379 33 394 478 478 68 68 68 115 115 470 278 278 325 449 176 176 176 328 328 200 200 464 89 446 116 131 133 364 109 109 403 171 171 171 252 449 449 176 176 328 200 200 250 345 141 141 281 281 9 198 22 448 448 448 14 411 350 350 350 350 350 350 348 359 81 324 324 324 422 164 164 164 214 214 214 214 214 200 200 200 195 248 248 394 76 75 377 87 87 87 236 321 384 371 374 374 374 374 132 132 236 36 310 107 395 395 151 151 151 169 150 39 86 86 238 6 127 114 361 361 361 361 361 388 388 303 117 48 229 321 247 126 126 292 292 408 408 408 408 391 321 321 373 373 400 400 400 400 30 422 422 162 232 68 68 115 273 470 486 486 486 460 460 169 169 36 227 483 226 440 89 446 322 67 394 133 364 321 364 109 109 171 171 171 252 252 449 191 191 191 191 131 133 133 321 345 333 220 220 220 164 483 14 226 321 321 209 411 297 297 297 297 297 297 297 293 497 175 81 58 58 156 156 156 156 156 245 399 217 217 217 70 65 65 428 428 146 146 358 449 449 449 242 116 116 33 33 217 217 217 217 473 290 290 290 290 290 434 434 434 339 303 303 48 48 48 417 491 170 170 28 28 28 28 491 362 362 362 491 491 362 362 491 362 491 491 211 211 491 341 369 369 369 369 21 21 21 21 21 21 21 21 21 101 101 101 149 149 228 321 321 7 217 473 473 329 329 329 460 169 169 164 164 219 219 485 485 378 88 88 242 446 348 90 90 465 445 445 351 351 486 315 319 450 413 413 76 449 449 300 191 313 314 198 22 283 455 38 232 232 238 6 272 470 171 171 171 252 99 436 436 60 60 298 116 33 250 217 473 65 486 486 460 460 169 150 54 238 6 272 300 334 382 313 251 251 251 241 431 405 405 405 206 178 458 192 176 135 135 200 200 200 199 230 230 230 230 215 35 96 198 22 283 455 236 129 321 108 119 351 278 278 143 458 192 192 277 385 325 180 106 405 405 405 206 169 352 352 25 459 459 271 271 31 9 142 221 321 259 190 488 488 488 488 215 35 29 29 382 313 236 36 36 119 351 153 153 153 372 467 337 337 301 236 108 377 123 123 416 458 144 180 180 84 84 496 88 88 176 176 135 328 200 200 248 58 72 437 437 350 350 350 350 203 53 381 394 394 155 155 332 332 332 186 162 342 115 273 273 151 151 215 215 354 29 334 334 334 59 452 452 229 321 247 126 126 126 326 326 326 326 326 326 326 326 101 149 149 149 228 321 83 55 55 322 67 212 34 145 145 486 376 460 460 169 150 150 86 238 6 272 57 57 57 203 473 118 118 118 118 402 198 22 5 5 455 349 234 234 261 25 106 265 265 265 85 85 146 438 173 349 234 393 198 164 470 498 498 313 285 325 41 324 324 422 36 310 161 161 487 487 288 290 290 290 290 434 434 339 250 250 345 389 389 389 314 131 472 401 401 401 80 321 80 321 478 66 68 482 115 273 374 374 374 132 413 33 250 212 354 420 420 420 464 464 255 255 251 251 241 431 235 235 235 235 235 235 413 303 303 303 48 48 48 417 417 170 170 421 421 491 421 421 491 491 128 128 491 128 128 128 193 193 17 +17 17 17 296 296 296 317 52 52 52 52 52 52 52 52 52 51 51 51 184 184 321 321 320 127 5 5 38 349 261 261 25 106 265 265 85 85 146 438 173 349 221 401 127 114 498 498 498 313 325 34 324 324 422 236 36 161 161 487 487 288 290 290 290 434 339 199 58 254 254 71 281 342 142 221 336 354 137 137 137 137 116 335 335 14 14 411 188 188 340 340 340 340 330 330 388 94 199 199 89 89 89 446 446 67 64 131 472 472 458 144 180 106 426 426 426 426 426 426 282 282 388 195 117 404 404 225 225 72 110 486 486 486 460 460 169 352 352 29 44 44 94 199 145 145 315 315 315 468 468 406 467 467 467 469 416 416 192 180 84 84 84 84 375 98 98 13 417 417 47 491 47 80 80 321 321 320 83 83 145 145 365 365 330 330 379 77 77 224 179 179 179 313 314 198 164 127 114 92 92 92 92 167 457 401 401 401 321 354 190 380 288 278 278 31 342 86 105 105 458 192 255 255 349 205 261 25 278 278 278 99 436 436 395 302 302 302 375 98 98 13 229 321 247 312 312 292 1 1 292 326 1 326 23 23 23 23 101 101 101 149 391 491 289 289 320 7 354 159 159 159 159 159 167 35 35 198 127 0 0 222 468 245 378 345 141 141 281 453 453 44 44 44 44 259 74 437 311 311 311 311 150 150 342 224 494 494 469 116 64 212 310 300 334 382 313 314 314 239 161 161 380 499 405 405 206 215 215 96 96 272 34 106 405 405 206 169 349 352 234 155 332 332 332 313 219 219 477 477 477 477 132 132 98 98 417 417 417 417 47 47 491 491 47 80 80 80 321 320 412 287 44 44 44 175 81 431 278 278 285 302 497 497 122 416 144 180 498 498 498 498 498 499 302 375 375 98 98 263 13 417 417 417 417 170 491 491 47 491 491 491 47 47 435 435 321 321 373 310 400 400 400 30 422 422 162 232 68 172 115 470 278 278 325 176 176 135 135 200 200 464 180 113 113 113 113 167 167 35 35 127 114 264 264 468 406 467 467 125 125 125 348 466 22 283 455 99 338 338 395 395 360 360 360 200 200 248 212 302 302 302 302 375 375 185 269 433 112 427 491 247 312 312 126 292 292 292 23 23 23 23 23 101 101 101 149 149 149 289 321 412 287 287 287 111 111 111 438 145 376 376 460 460 169 150 86 86 238 6 272 272 156 382 313 325 34 87 87 416 144 27 180 84 84 496 88 88 88 340 340 340 116 33 394 465 377 123 123 198 22 283 283 455 251 251 241 431 171 171 171 252 325 41 41 41 318 318 49 9 142 397 364 109 109 403 171 171 252 449 449 176 135 328 200 248 248 345 380 288 288 496 203 203 53 53 212 354 159 159 159 159 167 167 310 107 338 400 400 400 30 324 464 121 121 33 394 90 393 155 155 25 148 148 387 387 387 203 53 53 53 64 10 429 429 429 301 416 32 321 208 79 79 380 288 288 171 171 171 252 173 173 402 26 359 474 474 474 474 19 19 19 229 321 247 126 126 326 326 326 326 101 149 149 228 321 321 412 45 45 45 169 143 310 107 400 400 30 301 422 129 74 492 492 245 349 349 261 25 498 498 498 313 313 36 377 87 87 38 54 86 6 272 470 171 464 464 464 113 113 113 113 167 77 478 172 172 273 265 265 265 265 85 85 299 299 24 131 419 439 439 417 417 237 128 193 17 +17 17 17 363 363 363 51 51 184 184 321 373 373 338 400 400 400 400 213 356 356 368 453 342 168 44 44 44 44 458 144 445 351 343 343 343 343 171 358 358 358 39 342 224 168 111 111 111 111 438 186 99 395 395 389 389 236 36 478 224 273 470 403 403 403 207 207 207 454 263 48 417 417 417 417 417 237 237 442 28 28 28 491 28 442 442 442 442 362 362 362 362 362 362 362 362 362 362 305 218 218 218 218 366 218 491 491 366 366 366 366 366 366 491 366 366 366 366 316 316 316 316 435 321 289 321 209 287 287 111 111 111 111 438 438 203 10 479 307 307 307 61 285 34 154 154 458 96 342 86 105 336 470 470 151 151 178 178 96 36 449 176 135 135 200 464 44 44 44 416 321 208 79 498 498 498 498 499 302 375 98 98 98 13 13 414 170 170 442 47 47 47 491 47 47 47 491 316 316 80 80 321 289 66 66 66 179 179 179 179 314 314 196 217 473 486 486 329 460 169 169 352 183 485 485 485 374 301 129 321 259 425 425 386 431 376 365 365 299 76 76 465 26 26 359 359 474 474 474 19 19 229 321 247 126 126 126 126 326 326 326 326 101 408 408 408 391 391 391 391 316 316 80 80 289 321 321 321 188 177 177 177 356 356 356 342 168 44 44 44 8 32 32 32 321 354 354 153 153 153 153 387 387 387 146 464 464 111 111 111 146 438 202 402 402 402 75 144 27 437 319 319 203 53 53 394 90 393 234 205 155 332 148 148 148 372 372 372 59 452 263 263 78 414 47 47 47 47 47 491 47 47 80 491 321 289 289 373 451 451 451 30 30 30 99 338 338 389 389 389 389 314 32 239 354 420 420 420 213 213 252 422 183 183 451 286 286 286 286 334 59 59 59 452 263 321 247 247 126 126 292 292 326 326 326 326 326 326 101 101 101 149 228 289 289 320 217 473 258 258 31 342 224 494 494 368 453 342 483 14 145 145 284 329 329 175 175 81 81 469 416 458 458 96 342 68 115 273 365 365 365 330 348 64 212 300 382 382 313 186 162 162 482 482 105 105 336 470 470 432 330 330 64 64 77 77 342 224 300 334 334 334 59 245 43 43 345 141 141 141 281 9 9 6 6 87 87 87 87 8 321 354 190 288 288 360 360 200 200 183 183 57 57 57 57 53 473 106 410 410 410 410 173 280 29 29 382 245 349 155 155 165 165 165 53 53 10 479 331 84 84 496 274 173 280 29 38 38 162 482 482 105 336 144 180 496 496 496 274 99 99 436 107 60 423 423 349 349 205 155 155 332 332 332 372 372 245 203 473 429 429 429 429 19 19 454 454 417 414 170 170 170 28 491 28 2 2 2 2 491 2 491 491 2 2 2 316 316 316 491 316 316 73 321 321 321 321 7 127 5 5 38 162 342 86 238 6 371 470 171 171 171 252 99 436 436 60 298 275 116 33 250 217 473 65 486 486 486 460 169 169 150 86 238 6 272 300 334 382 245 43 43 364 276 109 278 278 31 342 342 224 302 302 302 302 375 122 122 122 352 419 427 229 247 312 15 15 15 15 15 193 193 193 17 +17 17 17 296 363 363 363 51 51 51 184 491 184 321 7 7 320 127 357 357 443 443 240 271 150 39 86 238 198 198 114 0 222 468 468 313 186 186 162 68 68 115 273 231 231 231 231 53 53 217 473 65 258 38 31 162 68 68 238 6 470 470 470 171 171 171 171 358 358 233 233 321 192 419 439 417 417 417 237 237 47 80 321 321 435 373 451 451 451 30 30 422 162 68 68 115 470 470 120 120 120 37 24 24 404 13 229 491 247 312 126 292 292 292 292 292 21 21 21 408 408 408 149 149 228 321 321 320 7 217 473 258 258 258 31 342 224 494 494 494 31 162 232 105 105 336 470 329 432 330 330 64 77 77 224 300 334 382 245 245 458 144 445 210 210 210 210 210 203 203 53 106 230 426 426 206 169 349 402 198 198 22 283 455 236 161 161 487 487 288 290 290 290 434 434 250 250 345 333 333 220 220 129 127 114 92 92 92 92 167 167 457 32 32 32 259 208 498 498 498 498 498 134 302 302 302 375 175 175 81 89 89 322 67 394 32 239 144 445 210 210 210 210 210 173 349 402 156 156 156 156 156 467 467 340 340 116 394 465 377 123 123 399 70 46 46 46 46 46 438 236 36 310 107 395 180 499 499 306 306 306 306 306 59 37 37 243 233 75 227 419 427 78 56 491 491 312 292 292 292 23 23 23 408 408 408 391 321 321 373 66 68 115 273 470 120 120 240 314 314 219 219 219 219 152 152 152 374 132 132 88 88 89 89 446 116 212 131 219 222 222 222 387 387 186 186 162 232 172 115 273 278 278 31 31 54 86 238 6 272 300 300 355 132 43 345 347 347 347 347 347 467 467 313 236 236 239 384 180 180 405 405 206 215 215 35 96 272 176 135 135 200 248 58 156 156 156 156 156 59 245 349 155 155 165 165 165 165 53 44 44 44 335 14 14 411 411 153 372 372 372 349 349 352 261 242 242 94 199 459 271 38 162 342 68 115 273 265 265 265 85 146 146 175 175 81 459 203 203 117 404 229 247 126 126 126 326 326 326 326 101 101 149 149 228 321 412 83 55 55 55 322 67 466 45 45 45 45 36 36 107 219 152 152 152 132 378 345 389 389 389 314 239 239 420 420 420 464 255 255 255 251 251 241 431 235 235 235 235 348 200 248 76 393 155 332 332 332 245 156 156 156 156 245 245 129 129 321 74 190 488 488 151 368 453 453 168 11 11 379 64 243 465 26 359 474 474 474 474 19 19 48 417 417 417 417 170 47 491 47 491 47 491 491 47 316 80 289 321 7 7 127 114 92 92 92 92 240 167 77 77 342 168 106 297 297 297 297 297 297 293 175 111 111 111 111 438 438 438 10 479 331 84 84 84 88 88 255 255 255 8 354 180 113 113 113 113 450 285 285 277 277 277 277 24 131 439 417 417 417 417 491 491 47 491 80 80 321 412 83 83 83 194 194 55 55 322 212 34 111 111 111 111 111 438 438 58 72 110 202 202 202 202 202 29 242 116 90 394 239 27 180 405 405 206 240 285 34 475 475 475 475 475 475 301 399 70 138 138 138 138 138 372 245 245 14 411 411 153 153 372 372 372 349 349 155 29 242 275 379 379 471 471 49 142 221 336 144 121 121 379 394 478 68 342 115 444 444 444 213 464 139 139 302 302 497 122 122 131 183 286 286 286 286 406 406 467 467 255 8 354 180 180 113 113 113 450 450 413 37 243 270 270 433 390 390 18 112 56 56 56 312 312 312 15 15 15 15 15 15 15 15 15 15 15 260 260 260 193 193 193 193 17 +17 17 296 296 296 317 317 317 317 317 461 491 461 461 461 461 461 461 461 184 184 289 321 321 209 287 111 111 111 438 438 10 239 384 371 84 84 350 350 413 64 212 131 34 145 319 348 348 212 212 300 494 469 186 162 232 232 232 482 238 6 272 470 470 294 294 294 294 294 294 282 388 388 303 243 75 131 419 439 439 439 78 78 47 47 47 47 491 47 47 47 491 47 47 491 491 80 442 289 66 66 68 179 179 179 179 314 314 196 196 70 65 329 486 329 460 169 164 164 485 485 485 485 132 274 58 58 72 72 72 437 268 139 293 293 215 215 35 26 26 262 262 262 262 262 342 342 26 26 359 474 474 474 474 19 454 229 247 247 126 126 326 326 326 326 101 101 101 149 149 228 321 7 345 109 109 278 278 99 447 447 107 176 135 135 135 200 200 248 212 212 45 45 45 45 35 196 196 217 70 65 65 329 42 42 380 288 256 256 139 175 175 423 423 423 43 43 345 141 141 281 281 453 168 415 415 415 36 131 119 72 72 72 110 294 294 294 294 294 282 388 388 195 394 76 75 377 87 87 87 129 321 144 27 351 496 496 496 274 215 215 401 401 321 354 333 220 220 198 22 283 38 162 342 342 224 494 494 236 36 107 485 485 485 134 88 418 418 418 418 418 252 99 436 436 60 298 298 298 303 117 48 13 229 321 247 312 312 187 187 12 12 12 12 12 12 260 260 260 260 491 163 163 366 366 491 366 491 366 366 316 316 316 316 321 321 321 435 435 7 7 364 276 109 109 443 443 443 139 139 293 293 293 122 219 219 152 152 152 152 314 314 472 401 259 354 180 443 443 285 285 382 382 245 143 458 208 441 151 151 151 169 150 238 238 272 60 60 242 116 466 22 283 416 144 79 498 498 498 498 499 355 302 375 375 98 98 263 13 417 417 417 417 237 237 237 47 47 47 491 47 47 47 80 80 80 321 435 435 66 115 179 179 179 179 314 198 22 283 455 38 162 86 238 6 470 470 171 171 171 99 99 436 436 60 298 116 33 250 217 473 65 486 460 460 169 150 86 6 272 300 382 313 458 445 445 445 351 351 264 468 468 134 134 175 262 262 262 262 39 342 26 26 359 474 474 474 19 19 454 229 321 321 247 126 126 126 292 326 326 326 326 326 326 21 326 21 326 101 101 101 101 149 149 149 228 321 412 412 287 287 111 111 111 438 438 236 239 384 371 470 264 264 468 468 313 186 162 342 68 115 470 403 403 171 171 422 186 99 338 338 395 494 139 139 497 122 8 420 420 420 420 464 171 171 171 134 8 29 100 497 122 36 377 87 87 87 154 154 154 458 96 96 232 105 105 336 425 386 386 431 290 290 290 290 434 434 434 339 195 117 117 417 417 417 417 225 225 435 435 338 400 400 400 30 422 281 342 342 105 221 144 180 106 189 240 285 34 44 44 236 36 108 119 119 351 319 319 319 348 200 200 69 223 223 130 402 156 156 156 156 406 467 467 350 350 350 350 350 350 413 413 413 195 33 212 198 114 92 92 92 92 92 167 35 478 478 68 68 172 115 273 498 498 498 396 396 385 233 242 242 275 303 303 303 303 48 305 417 78 170 421 421 491 491 128 128 491 128 128 128 193 193 17 +17 17 17 296 296 52 52 52 52 52 52 52 363 101 101 51 51 228 491 289 321 7 473 473 329 476 171 252 378 337 337 324 301 216 216 0 0 0 301 378 43 345 347 347 347 406 467 145 113 113 113 113 285 34 223 462 402 402 221 401 259 354 153 153 153 153 387 387 387 318 318 185 453 9 168 69 223 198 198 22 283 455 129 354 190 380 499 288 365 365 282 299 64 212 131 219 219 152 152 152 378 43 364 276 174 174 319 319 348 64 76 449 191 191 191 191 24 131 404 417 417 417 417 417 237 237 237 491 237 28 491 28 362 491 102 362 102 362 362 362 491 366 491 316 491 491 316 316 435 435 321 321 373 451 451 30 30 301 378 364 276 276 346 346 405 405 405 405 206 178 96 96 227 472 472 472 472 401 75 310 107 395 180 329 426 426 206 348 76 465 26 26 359 359 359 474 474 324 464 464 255 255 255 43 364 276 109 109 403 403 403 207 207 207 19 19 454 197 197 80 80 321 321 320 7 354 420 420 420 360 360 360 135 135 135 200 200 248 58 58 72 437 319 319 319 348 348 64 248 212 79 495 334 41 41 41 19 454 229 247 247 126 126 126 326 326 326 326 326 326 326 326 326 326 101 101 101 149 149 149 228 321 412 83 194 194 194 194 322 388 67 466 127 448 448 448 464 319 319 319 348 90 90 205 261 25 148 148 148 387 396 186 310 107 60 60 298 94 11 11 11 457 457 217 217 473 65 486 486 486 460 460 169 169 164 164 485 485 485 485 485 374 132 43 43 345 141 141 141 281 342 26 26 251 241 431 443 443 169 169 402 402 6 272 377 87 87 236 239 239 384 371 371 374 374 374 374 132 132 132 132 132 132 132 132 197 197 321 127 114 114 92 92 92 92 92 460 167 385 35 75 227 472 397 397 345 407 407 407 407 407 310 447 397 397 141 141 141 281 54 9 142 72 72 72 437 306 306 306 306 396 396 285 300 382 382 349 205 155 332 332 332 58 58 183 183 183 57 57 57 57 203 53 381 381 195 394 212 198 127 114 89 446 446 67 394 394 32 32 32 401 401 321 75 354 485 485 286 286 286 468 396 313 325 325 176 135 135 200 200 199 44 44 44 251 251 251 251 241 241 431 265 480 480 480 85 85 85 146 464 464 275 275 388 94 199 340 340 199 199 154 154 154 36 77 342 342 86 221 336 321 384 384 371 93 120 120 120 120 330 388 303 195 195 303 117 404 404 78 78 491 491 312 312 292 292 292 12 12 12 23 23 260 260 260 260 260 391 391 391 491 289 289 321 7 7 7 364 276 276 346 346 405 405 405 206 178 35 35 458 192 180 230 230 230 230 215 215 35 96 401 75 108 377 123 123 123 88 44 44 44 416 416 239 144 79 498 498 498 498 498 498 134 302 375 375 375 375 98 98 98 13 417 417 417 417 237 237 47 47 47 491 491 80 80 491 289 321 321 287 287 44 44 44 38 162 232 482 482 482 238 6 161 161 79 487 288 290 290 290 434 434 339 339 310 107 447 221 144 79 498 498 498 498 498 302 302 375 375 98 98 225 483 226 209 44 44 44 44 33 335 14 14 411 411 153 153 372 372 372 396 349 349 234 261 261 242 242 116 116 33 90 90 212 239 79 79 498 498 498 498 134 302 375 375 375 98 13 229 321 247 15 15 15 193 193 193 17 +17 17 17 363 363 363 51 149 228 228 321 83 83 194 194 194 194 322 67 64 212 212 34 44 44 44 217 217 473 65 486 365 365 460 330 388 64 212 131 34 223 223 130 402 402 156 156 156 156 59 59 59 245 245 43 43 364 276 346 346 346 346 346 265 85 85 146 438 186 338 338 400 400 30 378 378 345 141 141 141 141 281 453 242 242 116 64 131 34 44 44 8 354 354 153 153 153 153 387 387 387 387 207 207 207 98 48 417 417 417 417 170 170 491 28 28 28 28 2 2 2 491 2 2 491 2 491 2 2 2 366 366 491 316 316 316 316 491 73 491 289 321 7 7 217 217 473 65 486 486 486 486 460 460 169 169 169 164 164 164 219 219 219 485 485 374 374 132 132 32 32 321 208 208 79 79 380 380 288 84 496 496 496 496 274 274 413 413 413 413 64 212 212 34 34 340 340 116 33 394 478 478 232 232 232 232 232 105 105 336 354 470 286 278 498 468 468 468 468 468 467 277 385 325 449 34 253 253 253 453 168 30 30 30 422 129 75 108 119 308 308 308 308 308 308 308 396 313 64 212 131 255 255 8 354 180 113 113 113 113 113 450 450 413 24 36 449 89 89 116 33 33 394 90 338 338 338 338 338 338 395 189 151 151 169 349 349 352 29 302 302 302 302 497 122 122 122 314 401 401 401 321 310 107 107 395 432 432 432 330 379 64 76 36 26 26 359 474 474 474 324 301 239 384 371 180 315 315 315 450 450 413 466 466 22 283 455 455 259 74 425 386 431 486 486 460 460 167 35 393 205 321 155 148 148 148 387 387 203 53 90 90 75 119 441 441 153 153 153 153 372 372 37 314 77 77 342 9 224 156 156 156 156 59 452 452 263 229 247 247 312 312 312 292 292 292 292 292 292 292 1 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 260 260 260 260 408 408 391 391 321 321 373 373 400 400 400 30 30 58 110 254 254 254 254 314 35 259 137 137 137 137 399 250 250 276 346 346 346 206 206 240 310 310 107 395 176 135 135 200 248 183 57 57 57 57 57 53 473 335 14 411 145 463 463 463 463 29 29 382 313 186 186 162 68 68 273 278 278 278 330 379 394 77 342 342 451 30 30 30 464 254 254 254 131 129 321 74 437 311 311 311 311 311 311 460 169 150 86 86 238 6 272 272 334 334 334 59 59 452 452 229 321 247 126 126 126 292 292 292 326 408 408 149 149 228 321 321 209 83 55 55 322 322 67 310 400 400 400 30 30 3 58 72 72 110 110 254 254 254 240 131 58 183 156 156 156 156 245 335 14 411 265 265 265 265 265 85 85 146 318 368 342 168 168 125 125 125 125 348 199 183 183 57 57 57 57 53 53 10 10 479 331 331 315 315 315 315 315 450 450 450 450 98 98 13 13 78 170 170 170 170 28 28 2 491 2 2 491 2 2 2 2 316 491 316 316 316 316 289 289 289 321 7 7 217 473 329 329 329 329 329 329 329 164 164 485 485 485 301 378 378 345 141 141 141 281 9 238 221 196 479 307 307 307 61 167 167 457 457 251 251 241 367 367 367 367 458 192 176 135 135 200 200 464 464 415 415 415 415 415 240 285 156 156 156 156 59 452 452 229 321 247 312 15 15 15 15 193 193 193 17 +17 17 17 296 491 317 317 491 491 184 184 184 321 320 412 44 44 44 44 36 310 107 107 395 437 91 91 91 91 85 85 139 139 293 122 122 34 69 223 130 280 44 44 8 8 354 180 113 113 113 285 285 34 44 251 251 251 241 241 431 443 443 173 173 280 242 275 275 116 195 117 117 48 417 414 170 491 170 491 321 211 211 312 312 292 292 326 326 326 326 23 101 101 101 149 391 491 289 289 321 144 144 106 499 306 306 306 306 306 396 396 215 215 96 36 272 272 340 340 94 199 44 44 44 4 280 104 104 104 104 104 468 337 337 337 337 422 422 99 338 338 338 338 395 395 153 153 153 153 372 372 396 385 385 36 227 419 225 225 80 80 491 321 321 7 7 32 4 104 104 104 104 104 468 337 337 337 324 422 143 36 108 119 119 437 437 265 265 428 428 428 146 358 358 233 75 227 419 225 225 225 80 80 80 80 7 7 4 280 104 104 104 468 468 337 337 337 324 3 14 14 411 411 284 319 319 240 416 416 96 134 134 359 359 81 166 166 324 301 236 239 161 161 79 288 288 151 151 271 271 39 342 342 342 224 462 462 462 462 402 402 219 219 219 219 180 180 443 139 175 175 81 84 496 88 88 109 459 459 459 99 99 447 447 447 221 336 144 208 79 380 288 403 403 403 171 324 3 301 301 43 364 364 345 109 278 278 116 33 394 77 77 342 68 342 224 41 41 41 41 19 19 454 229 321 247 247 312 126 292 292 292 292 292 292 23 23 23 23 23 23 23 260 260 260 260 260 260 391 391 228 321 373 373 400 400 400 400 301 378 43 364 276 210 210 210 372 372 372 467 44 44 44 349 349 234 234 261 261 25 470 171 171 171 171 252 252 325 34 191 191 191 314 131 472 401 401 321 259 190 190 380 380 315 315 315 315 450 450 450 413 348 394 478 478 232 232 172 115 273 470 171 171 171 252 175 81 300 300 382 245 58 58 72 110 110 486 486 486 460 460 460 169 385 233 227 227 419 225 225 225 225 412 412 83 55 55 322 67 212 401 321 354 255 255 116 94 94 398 213 213 213 213 252 164 164 164 164 164 283 283 455 72 72 72 110 486 486 486 486 460 282 282 385 385 227 227 419 427 321 247 126 126 326 326 326 326 101 408 149 149 321 412 154 154 154 143 96 96 66 232 68 238 6 272 470 432 432 432 330 64 64 64 212 176 135 135 200 200 248 248 212 384 180 315 315 315 315 450 413 348 199 58 156 156 156 156 156 245 8 8 354 180 376 376 376 376 376 376 460 460 169 178 233 321 208 133 397 345 347 347 313 313 236 36 36 108 119 119 485 374 374 374 374 132 132 132 8 259 259 190 190 380 288 288 403 171 171 171 246 246 318 24 24 270 270 342 168 168 462 462 4 4 4 4 104 104 104 104 104 468 337 337 337 324 324 422 349 164 164 164 164 164 25 278 278 278 278 278 178 143 321 192 192 419 225 47 491 80 80 80 80 321 75 371 490 490 490 162 232 232 68 68 115 273 273 265 265 265 428 146 146 325 34 191 191 191 314 26 26 359 166 166 166 324 301 42 42 42 147 147 147 380 288 288 443 120 120 120 37 37 24 24 131 404 225 225 225 80 321 373 72 72 72 110 264 264 264 264 264 264 59 59 59 59 452 263 13 78 170 170 491 421 491 491 211 491 421 491 15 15 15 193 193 193 17 +17 17 17 296 363 363 363 52 52 52 52 52 408 51 51 51 184 289 321 320 156 156 156 156 245 349 205 205 261 343 343 343 343 343 343 252 186 39 342 86 86 142 397 141 141 141 281 162 232 232 232 482 482 105 105 196 70 65 65 481 481 481 481 182 182 182 375 375 375 98 98 98 225 225 80 80 491 80 80 321 7 7 364 276 276 346 346 428 428 428 428 146 146 358 358 233 321 227 227 419 225 89 483 321 188 89 446 446 33 394 394 76 164 164 164 164 164 278 278 278 278 120 330 303 303 303 303 117 48 48 417 47 47 491 491 80 80 80 80 289 321 320 287 287 297 297 297 297 293 293 186 162 54 68 115 224 273 84 84 84 274 399 399 70 70 383 383 383 383 167 310 107 447 447 447 393 234 261 25 380 288 151 178 178 458 458 208 302 302 302 302 375 375 122 122 227 419 419 427 82 321 312 312 126 292 292 292 292 23 23 23 101 101 101 149 228 228 321 320 373 156 156 156 245 399 217 217 70 473 65 315 315 315 315 315 450 450 293 169 352 352 352 352 97 397 397 345 345 141 141 281 453 9 26 26 251 241 241 431 284 306 306 306 306 306 306 396 396 396 37 233 36 310 107 107 447 18 97 97 225 321 412 83 55 55 446 67 394 478 66 172 115 273 273 84 410 410 410 43 29 347 347 245 245 58 156 156 156 245 14 14 14 411 287 265 265 265 265 265 265 85 85 85 207 207 318 185 269 433 433 160 97 397 397 345 407 407 407 407 310 107 447 447 26 251 241 367 367 367 367 458 96 96 272 472 472 221 401 321 321 208 79 79 288 288 360 360 360 360 434 434 339 199 340 340 340 116 33 394 478 68 68 172 115 273 319 319 319 203 53 53 251 251 241 431 428 428 428 428 146 146 385 77 77 342 224 89 89 116 33 250 217 217 473 65 374 374 374 132 132 37 37 24 321 270 433 160 112 427 229 247 312 126 326 326 326 101 101 149 149 321 412 412 83 55 55 446 446 67 131 472 472 458 208 208 79 79 380 288 403 403 403 403 207 324 464 464 464 446 116 94 199 493 493 493 493 493 216 300 334 334 304 304 304 185 185 269 323 18 112 112 56 56 56 170 170 28 28 28 491 491 28 28 362 491 362 362 362 491 491 362 362 362 362 40 40 362 218 491 491 305 305 366 366 366 305 366 435 435 435 435 435 435 321 435 435 373 373 66 68 172 115 344 344 344 344 344 274 274 349 205 261 25 106 306 306 306 353 396 313 216 22 448 448 448 14 411 411 153 153 153 387 387 313 314 196 196 398 134 134 468 337 337 337 464 464 255 255 215 96 368 453 453 168 273 498 498 498 396 173 29 29 334 334 59 452 452 263 13 229 491 442 312 312 312 292 292 292 292 292 326 326 326 326 326 326 101 408 149 228 321 305 209 287 44 44 94 199 154 154 154 96 96 482 482 238 6 161 79 153 153 153 153 387 396 240 314 196 309 199 264 264 264 468 468 468 337 337 464 464 464 255 255 215 96 478 342 9 168 470 498 498 498 396 173 173 280 29 334 334 59 452 452 229 321 247 15 193 193 17 +17 17 51 51 228 289 321 7 70 70 65 428 428 428 146 146 325 449 202 202 202 202 402 162 232 172 172 267 267 267 267 267 434 434 339 248 248 212 45 45 45 45 198 22 5 455 236 129 321 310 107 107 395 395 278 278 330 116 195 250 250 345 141 141 281 281 453 9 142 4 4 4 104 104 104 104 104 468 337 337 324 301 143 129 401 321 74 74 441 441 441 441 441 387 360 360 360 252 339 76 465 449 191 191 191 191 24 325 89 89 446 33 394 76 74 190 492 492 492 313 94 94 331 331 315 315 315 315 450 450 450 413 413 243 243 77 433 86 238 6 6 227 427 427 491 247 126 126 292 326 326 326 326 101 408 149 228 228 321 320 321 127 45 45 45 45 35 35 127 5 5 455 236 129 259 354 278 278 278 278 416 416 192 180 106 265 265 265 265 85 85 85 146 318 49 9 142 397 347 347 347 347 245 349 349 205 261 261 25 424 424 424 175 81 462 462 462 130 402 478 162 232 232 232 232 105 105 336 336 354 485 278 498 468 468 467 277 277 469 325 449 89 89 446 53 394 212 280 106 265 428 428 146 438 173 280 280 486 486 486 460 169 150 150 342 342 224 469 469 325 449 41 41 41 41 19 19 19 454 454 454 78 491 170 491 312 187 187 292 12 408 408 408 149 228 321 321 127 45 45 45 45 35 198 22 5 455 399 217 473 65 315 315 315 315 450 450 450 169 169 164 164 397 397 397 141 141 141 281 31 162 232 232 68 68 482 397 397 397 109 213 213 213 252 252 36 26 26 26 251 241 431 278 278 278 215 215 35 96 96 465 272 89 89 446 67 131 34 154 154 458 96 96 54 142 105 336 336 190 380 288 151 151 169 150 342 342 224 224 494 459 459 459 459 37 173 352 352 352 427 491 247 126 126 126 326 326 326 326 101 101 101 149 149 228 321 321 320 127 45 45 45 45 35 35 198 22 5 455 349 205 234 234 261 25 148 148 148 148 372 372 372 396 58 72 72 110 110 120 120 120 120 240 24 24 133 133 364 345 141 141 141 141 281 281 9 142 221 336 336 259 190 190 380 380 499 499 499 405 426 426 426 426 206 206 206 37 24 34 89 89 446 116 394 90 393 234 234 234 234 261 25 441 424 424 424 182 182 375 375 375 98 98 13 13 417 170 170 47 491 47 491 491 491 2 491 491 316 73 289 289 320 412 188 188 340 340 116 33 394 478 338 338 338 338 395 470 153 153 153 153 387 372 396 396 385 233 227 227 419 439 417 78 47 47 491 47 47 491 80 321 321 80 321 435 209 287 287 353 353 353 353 396 313 236 36 384 490 490 490 31 162 68 115 115 273 308 308 308 396 313 94 94 176 176 135 328 200 200 199 255 154 154 129 401 321 96 66 482 238 272 79 153 153 153 387 387 396 314 196 196 479 398 398 264 264 468 467 467 255 255 215 96 478 342 68 115 273 498 498 498 498 396 173 173 29 29 334 334 59 59 452 263 229 247 247 126 126 326 326 326 326 326 326 326 101 101 101 149 149 228 321 289 320 7 70 65 65 389 428 428 146 240 325 34 202 202 202 402 221 458 27 121 121 121 33 394 76 259 208 208 386 386 386 444 374 374 374 252 325 34 191 191 191 37 24 404 427 229 247 193 193 17 +17 17 17 363 51 51 184 320 320 345 333 333 220 220 402 66 66 68 115 344 344 344 344 274 274 251 251 241 431 374 374 374 374 285 34 469 469 143 458 208 79 459 459 271 31 342 86 26 26 166 166 166 464 464 464 255 255 349 234 234 261 190 380 288 288 403 403 403 207 207 207 37 24 24 404 439 417 417 417 170 170 28 28 491 28 491 362 491 491 362 491 362 491 491 491 362 362 491 362 362 491 40 211 369 369 369 369 21 21 21 21 21 21 21 408 408 408 149 149 228 321 321 320 7 217 70 65 486 486 486 460 460 169 169 164 164 164 485 485 485 485 374 132 132 58 58 72 268 268 268 268 268 88 88 109 84 463 463 463 173 280 29 334 59 59 452 263 263 417 417 417 417 80 321 321 7 7 345 141 141 141 281 162 232 232 232 482 105 336 336 470 470 264 264 264 264 468 468 313 313 314 314 198 22 448 448 448 464 106 106 372 372 372 313 236 236 36 371 485 213 286 286 286 139 302 302 175 175 69 223 223 130 478 232 232 105 105 336 321 354 470 213 213 252 143 192 176 135 135 135 200 248 248 248 393 205 261 25 498 498 498 498 498 396 271 271 39 54 86 238 6 427 427 247 247 126 126 326 326 326 326 101 101 101 149 228 321 321 373 155 155 155 332 332 332 372 372 372 467 253 253 38 162 232 172 172 115 485 374 374 374 348 94 199 253 253 253 99 338 400 400 400 30 422 143 144 27 121 121 121 394 76 76 208 208 386 386 444 444 374 374 252 325 191 191 191 37 314 198 198 45 45 45 183 183 451 30 30 301 378 345 141 141 281 342 342 221 336 144 27 27 351 319 319 319 53 53 176 135 135 200 248 76 75 108 377 123 123 123 123 132 58 156 156 156 156 59 59 452 229 229 247 126 126 326 326 326 326 101 101 408 149 228 321 321 373 400 400 400 30 422 422 162 482 482 482 482 238 272 189 189 189 189 285 34 230 230 230 230 230 215 215 35 74 419 439 439 78 78 47 47 80 80 80 289 289 320 208 79 499 486 486 460 460 169 150 342 105 105 336 354 176 176 135 135 200 248 248 333 333 220 220 220 142 133 364 276 174 174 174 174 319 319 348 348 195 195 90 90 90 393 234 234 234 234 261 261 25 470 278 278 278 330 330 388 195 195 195 250 250 394 32 32 259 354 190 380 380 315 315 315 315 450 450 450 413 413 33 58 58 72 72 72 294 294 294 294 294 294 294 294 294 282 282 388 195 64 212 131 427 321 247 126 126 326 326 326 326 101 149 149 228 321 320 22 5 455 455 72 72 72 294 294 294 294 294 388 348 64 64 212 212 26 302 302 302 175 69 69 69 130 280 44 44 44 99 338 338 338 338 338 395 470 486 486 486 460 460 215 354 41 324 324 324 3 335 14 226 411 411 424 424 424 424 424 424 274 122 122 131 472 393 234 234 261 25 486 486 486 460 460 169 99 436 436 436 60 242 116 116 33 212 131 472 221 321 144 27 437 437 306 306 306 460 215 35 29 469 277 277 314 401 401 321 354 180 376 376 376 376 376 282 207 37 24 192 192 427 321 247 126 126 23 408 408 408 149 228 321 321 320 127 448 448 448 14 14 411 493 493 493 493 493 216 127 300 334 334 59 452 186 99 338 338 400 400 400 30 422 58 58 72 110 110 139 139 139 293 293 122 122 34 180 113 113 113 113 167 167 36 449 123 123 123 123 183 183 57 57 57 57 57 203 381 381 381 48 48 417 417 417 170 421 421 421 421 491 128 491 128 128 193 193 17 +17 17 17 296 317 317 317 491 491 491 491 491 461 184 321 435 435 321 435 287 287 111 111 111 438 162 342 224 494 494 236 74 470 496 496 496 496 496 274 368 368 9 219 152 152 152 88 353 353 353 245 399 70 473 258 31 54 86 238 272 272 300 245 399 217 473 65 486 460 460 169 164 485 485 485 382 422 458 458 144 27 437 437 151 169 169 164 402 221 401 321 354 29 498 313 313 325 34 462 462 130 402 321 259 79 79 288 360 360 360 200 200 248 445 445 180 171 171 171 252 215 8 354 100 302 375 497 98 185 269 433 390 160 112 112 56 491 312 312 312 187 187 12 12 12 12 12 12 12 23 260 260 260 260 391 391 391 491 321 373 338 400 400 400 400 30 422 162 68 68 115 470 470 120 120 240 314 196 340 116 199 44 44 44 129 259 74 492 236 129 321 445 445 485 485 485 485 485 485 374 374 132 359 81 485 485 134 382 134 359 359 81 166 166 324 422 143 401 401 321 321 144 208 208 208 386 386 386 286 286 286 286 286 286 334 382 59 304 313 186 162 66 482 482 482 482 105 397 336 109 213 213 213 252 143 131 472 393 393 261 343 343 343 343 343 343 343 358 39 39 433 433 160 427 56 247 247 312 126 292 292 326 326 326 326 326 101 101 101 149 149 321 412 287 287 111 111 111 356 356 53 394 212 4 104 104 104 104 104 337 337 337 301 143 144 208 386 431 376 376 376 240 24 36 87 87 87 162 232 172 115 267 267 267 267 267 219 219 477 477 477 477 477 132 13 229 491 247 312 126 292 292 292 23 23 23 101 101 101 149 228 321 412 412 287 111 111 111 378 378 141 141 281 453 142 221 336 420 420 420 416 458 445 485 360 360 360 94 176 135 135 248 76 108 377 87 87 129 354 420 420 420 464 464 44 255 38 349 205 205 261 487 288 288 288 171 171 252 24 131 219 152 152 152 378 43 345 347 347 372 396 313 457 131 221 458 144 27 351 351 319 319 203 53 176 135 328 200 248 393 205 155 332 332 332 332 245 399 429 429 429 429 19 19 229 247 247 126 193 193 193 +17 491 211 491 296 296 363 363 326 101 101 149 149 228 321 321 287 111 111 111 438 58 110 254 254 254 314 196 196 217 473 476 476 476 252 325 34 230 230 230 215 35 196 196 46 46 46 46 438 399 399 217 70 65 480 480 480 480 85 299 299 339 212 131 427 229 247 126 326 326 326 101 149 149 321 321 320 45 45 45 325 118 118 118 118 402 219 152 152 422 236 239 384 371 278 278 314 196 196 242 242 33 90 465 144 27 351 351 319 319 203 53 394 76 205 155 332 332 332 399 399 429 429 429 422 143 108 377 377 87 236 10 479 331 331 428 265 428 428 428 146 207 358 233 131 419 321 247 15 193 193 +17 17 17 363 363 363 51 149 228 228 321 321 287 287 111 111 111 111 378 378 43 389 389 389 314 242 242 394 394 32 259 420 420 420 420 464 44 44 236 129 354 354 278 278 325 34 300 255 349 349 234 234 261 190 487 288 288 288 403 171 207 207 37 24 131 427 491 247 126 126 326 326 326 326 101 149 149 149 228 321 209 83 55 55 55 55 322 94 199 177 177 177 457 389 389 389 314 259 259 420 420 420 301 301 251 251 251 251 251 251 251 241 266 266 266 266 266 173 402 402 26 359 359 81 166 324 422 36 377 87 87 38 162 232 172 26 26 359 444 444 213 252 8 354 89 340 116 199 44 44 43 43 364 276 346 346 346 265 85 85 85 139 139 293 293 122 122 35 401 401 401 75 310 107 107 107 395 395 351 264 264 264 468 468 406 337 337 324 252 143 36 161 487 487 487 41 324 3 335 14 14 411 297 297 297 297 297 297 297 293 293 497 497 43 43 364 364 276 346 346 428 428 428 146 358 76 449 472 397 397 333 333 220 220 164 142 221 401 321 321 321 354 425 425 431 374 374 374 374 374 132 132 203 203 53 473 340 340 116 466 22 283 455 399 217 70 473 65 65 350 350 413 413 33 33 394 478 338 338 338 395 470 480 480 480 85 299 299 339 64 212 465 384 430 430 430 430 430 465 449 152 152 152 152 349 164 164 214 214 214 214 360 328 328 200 243 233 192 192 419 229 491 312 312 491 187 187 187 201 201 201 201 201 201 201 201 491 201 491 201 201 435 211 211 408 149 321 321 321 219 152 152 152 152 143 458 144 389 389 389 325 34 255 399 217 217 65 486 486 486 460 460 240 310 107 395 242 116 116 219 219 152 152 378 378 347 347 347 236 239 161 397 133 276 109 189 139 139 175 81 176 135 200 464 464 340 116 33 33 250 217 70 70 70 65 306 306 306 306 396 134 215 35 29 100 497 497 497 58 72 72 72 72 437 481 481 481 481 481 481 182 182 182 375 375 375 185 269 342 86 221 336 144 430 430 430 430 430 430 430 430 430 131 449 449 485 152 477 477 374 132 132 13 229 491 247 312 15 15 15 15 193 193 193 17 +17 17 17 296 317 317 317 317 317 491 317 317 461 461 461 461 461 461 461 184 184 184 184 321 320 7 217 217 217 473 329 329 329 329 329 460 169 164 164 164 219 485 485 485 374 132 132 274 58 58 72 110 254 254 254 254 314 401 75 108 119 295 295 295 295 295 143 458 192 242 242 116 466 466 22 283 455 38 162 54 482 482 105 221 336 79 79 499 499 405 206 206 348 199 41 324 324 301 251 241 431 278 278 285 302 497 497 497 58 58 72 110 294 294 294 294 294 294 282 388 64 212 131 335 14 14 411 411 284 405 405 405 206 178 35 35 441 441 109 109 134 313 24 26 26 359 359 474 474 324 464 340 340 340 116 33 58 183 183 257 257 257 257 257 120 50 50 185 185 185 269 433 433 390 18 427 56 247 312 312 126 292 292 326 326 326 326 326 101 101 101 408 149 149 321 289 7 7 7 4 127 361 361 361 361 361 330 388 94 199 89 89 446 116 33 212 212 127 114 361 361 361 264 264 264 264 468 59 452 452 263 263 417 417 414 47 80 321 321 435 373 451 451 30 30 236 325 490 490 38 162 342 115 273 265 265 428 146 146 325 34 191 191 325 133 133 259 181 181 181 181 167 457 75 108 377 87 87 236 325 371 374 374 374 374 132 98 98 48 48 417 417 170 170 102 102 28 28 40 40 40 40 40 40 40 40 40 40 40 491 362 491 218 366 305 305 491 366 366 40 40 40 40 435 435 435 435 435 373 451 451 30 30 30 422 458 144 27 389 389 389 389 196 196 479 331 307 307 61 61 167 167 457 75 108 119 351 351 139 139 139 293 293 293 216 216 114 258 258 31 54 54 238 238 221 321 310 107 395 395 437 91 91 91 85 85 85 85 450 293 293 122 122 131 133 333 333 220 220 198 22 44 236 129 321 208 208 425 386 241 431 84 496 496 88 88 176 176 176 328 200 200 464 106 265 265 265 265 265 85 85 85 207 318 318 39 433 433 160 427 247 247 126 126 126 326 326 326 326 326 326 326 101 101 149 149 228 321 320 127 45 45 45 45 35 198 114 0 0 222 58 58 110 254 254 254 314 401 321 354 137 137 137 94 44 44 44 217 473 65 258 31 31 342 68 68 68 238 6 272 470 470 470 171 171 171 358 358 358 233 321 192 192 419 419 439 439 78 78 78 491 28 491 28 28 28 2 491 491 2 341 341 341 12 12 12 21 21 21 408 408 149 228 321 321 373 451 451 30 30 30 378 378 389 389 389 389 129 259 108 119 295 295 295 295 295 143 458 192 156 156 156 245 245 58 72 72 350 350 350 350 350 350 413 203 381 53 89 89 322 67 466 241 431 443 167 167 457 196 217 65 329 329 42 42 147 147 380 256 139 175 175 423 423 423 423 236 75 371 371 374 374 132 132 216 127 114 92 92 92 92 92 167 385 243 227 419 439 78 78 170 170 170 47 491 491 2 2 491 2 2 491 491 2 491 316 435 435 316 316 435 321 435 373 338 338 400 400 400 30 422 143 458 144 389 389 389 389 314 242 242 394 76 76 259 420 420 420 301 26 251 241 431 443 443 169 169 402 402 6 272 415 415 385 129 401 321 259 190 380 499 499 428 428 146 146 457 457 147 147 380 288 288 173 173 29 29 495 495 406 467 467 365 330 94 475 475 324 324 58 58 72 110 268 315 315 315 268 450 450 98 98 229 247 15 15 193 17 +17 17 17 363 51 51 228 321 320 309 331 331 231 231 399 399 473 65 486 486 460 240 285 300 382 245 43 364 276 181 181 181 181 167 35 35 196 196 473 258 258 31 342 86 86 6 272 470 470 171 171 252 458 458 192 389 314 314 321 354 137 137 137 399 217 217 473 476 476 476 476 476 476 207 37 24 131 427 229 321 247 312 126 292 292 23 23 23 23 408 408 391 228 228 321 373 66 172 115 273 344 84 274 88 14 14 411 297 297 297 297 297 297 293 293 122 35 458 208 208 441 109 151 151 151 169 150 54 238 238 310 107 60 298 298 298 379 471 471 49 342 89 89 446 67 34 145 443 154 178 96 96 342 105 105 321 354 386 386 386 469 116 94 418 418 418 418 418 418 99 436 436 436 60 298 298 298 379 379 471 471 471 49 9 142 221 196 70 65 428 428 428 146 325 325 34 253 253 453 9 142 133 364 276 109 109 139 139 139 293 293 293 122 35 354 420 420 420 422 36 384 490 490 490 349 349 234 261 25 487 498 498 498 396 313 285 131 34 89 116 33 394 465 377 351 139 139 139 175 58 451 30 30 378 43 141 141 141 31 162 232 68 68 115 273 470 171 171 252 173 402 402 26 359 166 166 301 8 354 354 180 376 376 460 178 178 458 192 415 415 314 472 221 458 208 79 288 288 360 360 434 200 248 248 212 445 445 171 171 171 171 252 215 354 100 100 302 375 375 185 185 269 390 390 18 112 427 56 56 312 312 312 312 292 292 292 292 292 12 12 12 12 12 12 12 12 260 260 260 260 163 163 163 163 163 163 491 316 316 491 316 316 73 289 321 320 287 287 287 111 111 111 85 438 203 53 394 478 162 232 68 115 273 106 499 499 306 396 337 337 464 464 464 111 111 378 88 345 141 281 31 342 26 251 251 241 431 403 171 171 171 358 358 233 321 227 227 419 419 439 439 225 225 47 47 47 491 47 80 80 80 289 451 451 451 30 30 422 162 232 172 115 179 179 120 120 314 457 310 310 338 338 395 499 499 265 265 85 146 146 37 359 359 474 474 474 19 454 454 417 414 170 170 170 47 28 28 2 2 2 491 491 2 2 2 2 491 316 491 316 435 435 289 435 321 144 27 351 319 319 53 255 255 255 251 241 431 235 235 235 235 413 413 98 48 13 13 13 170 321 170 312 187 187 292 292 292 292 23 23 23 23 23 101 101 149 149 228 289 321 321 127 5 5 455 72 72 441 153 153 153 372 396 313 186 54 54 224 50 356 281 281 9 168 106 410 410 410 410 410 173 402 29 495 406 467 340 340 340 466 22 283 455 448 219 219 219 180 180 306 306 306 306 306 306 59 37 37 404 439 439 439 78 78 170 170 28 28 28 491 2 2 491 2 2 2 491 491 2 316 491 316 316 316 73 73 289 321 321 445 445 278 278 173 196 196 429 429 429 219 464 222 222 245 245 245 8 354 180 376 376 376 376 376 282 37 37 233 192 419 419 439 78 170 170 442 442 187 442 187 187 12 12 12 12 260 260 260 149 149 289 289 321 289 209 287 16 16 16 16 16 88 88 111 111 111 111 438 143 35 389 389 389 33 394 76 465 445 445 445 351 351 264 264 264 468 468 468 337 337 324 324 464 277 277 277 385 36 227 419 439 78 78 170 491 47 187 47 47 47 442 442 442 442 442 127 22 5 236 36 36 107 395 351 91 91 91 91 206 206 122 122 35 29 456 456 31 162 9 105 336 74 106 426 426 206 348 64 212 191 191 191 314 401 321 108 107 107 395 485 286 286 286 468 245 349 349 155 262 262 359 359 474 474 474 474 19 454 229 321 247 15 15 15 193 193 17 +17 17 17 363 51 51 228 289 321 188 177 177 177 325 356 356 356 342 342 224 242 242 116 131 131 72 72 110 443 443 240 173 280 41 41 41 41 19 454 417 417 417 417 170 47 491 47 491 491 491 47 47 80 321 321 435 435 435 209 111 111 111 202 202 402 402 458 27 180 405 405 206 167 457 14 14 14 209 411 297 297 297 297 297 297 297 293 399 70 70 46 46 46 46 46 438 378 43 364 109 109 498 498 134 387 122 122 26 26 359 81 166 324 416 239 458 144 180 484 278 240 314 77 270 342 224 340 340 340 94 199 277 277 277 277 227 419 229 247 247 126 126 326 326 101 101 149 391 80 80 80 80 289 321 354 159 159 159 325 34 177 177 325 356 356 356 31 342 224 242 242 379 131 131 72 72 110 443 443 443 173 173 280 41 41 41 41 19 19 454 454 454 78 170 170 491 312 312 292 292 292 292 292 21 21 21 21 21 21 408 408 149 149 228 228 289 321 209 209 83 55 55 322 322 94 199 118 118 118 118 118 205 177 177 177 177 325 356 356 356 342 342 242 242 116 64 131 472 221 144 445 445 351 351 264 486 468 468 468 337 337 337 324 252 325 34 89 340 116 33 394 212 465 395 395 151 151 169 150 86 86 6 272 34 44 38 162 68 172 115 273 498 498 498 396 240 35 242 242 242 116 250 250 364 364 109 109 403 403 403 207 171 3 252 216 198 22 5 455 72 72 72 72 294 294 294 294 330 64 64 212 302 302 302 497 122 129 259 74 441 441 424 424 497 497 497 49 342 168 180 180 113 113 113 113 450 167 167 131 427 321 247 126 126 326 326 326 101 408 408 149 391 491 321 373 66 68 68 115 273 84 16 88 88 111 111 111 111 438 438 438 35 259 354 180 443 443 285 300 382 313 313 143 458 458 445 445 213 213 213 252 215 354 277 277 277 277 143 259 259 354 420 420 143 458 144 351 494 253 368 453 168 106 111 111 111 438 438 10 10 479 331 84 84 496 274 216 198 448 448 448 464 154 154 154 416 32 96 368 453 453 115 470 470 486 486 376 460 460 178 35 96 96 401 196 196 309 309 479 331 486 486 460 460 178 458 192 192 69 223 130 280 277 277 277 277 385 385 75 227 419 439 78 170 47 47 47 491 491 491 2 2 491 491 316 316 316 73 289 321 321 209 177 177 177 356 356 342 168 44 116 199 154 154 96 96 54 482 238 161 161 487 288 360 360 360 339 53 359 166 166 166 324 14 14 411 411 424 424 424 424 424 424 122 122 122 131 472 221 144 27 437 306 306 306 306 396 215 35 29 277 277 314 401 321 259 354 180 376 376 376 376 120 282 37 233 192 419 427 78 170 491 312 312 312 341 341 341 341 341 12 12 12 12 21 21 326 326 326 326 101 101 149 149 228 289 321 321 209 287 16 16 16 88 88 111 319 319 203 53 394 212 4 104 104 104 104 406 337 337 337 324 422 143 458 208 386 431 376 376 376 460 240 24 36 107 152 152 152 202 402 402 402 259 144 27 27 351 319 319 319 319 203 381 381 117 48 417 417 417 417 197 491 435 80 289 321 209 188 357 357 357 357 357 173 280 242 116 94 118 118 118 118 118 280 177 177 177 177 457 457 364 345 389 389 389 285 34 202 202 202 402 401 259 354 137 137 137 137 33 10 10 479 331 265 265 428 146 146 146 39 86 6 272 87 87 87 162 54 86 26 26 444 444 213 252 215 354 340 340 340 199 44 44 44 43 43 364 276 346 346 265 85 85 85 139 139 293 122 122 314 401 401 75 107 107 395 351 351 264 264 468 468 406 337 337 324 422 36 36 161 161 487 487 487 41 41 19 19 19 454 417 417 421 421 491 421 128 128 128 193 193 17 +17 17 17 296 317 491 317 317 491 184 184 184 184 320 7 345 152 152 152 152 402 221 144 180 189 405 206 167 36 377 87 87 236 161 79 499 499 499 428 146 173 173 280 29 255 251 251 241 235 235 235 235 235 348 248 76 259 74 74 351 213 213 213 213 213 186 39 342 342 224 110 110 110 202 202 202 430 430 430 430 430 243 133 259 345 109 41 41 19 19 454 229 82 229 312 312 126 292 292 292 292 292 292 21 21 21 21 408 408 408 149 149 321 321 320 473 258 258 31 342 224 494 494 31 162 232 105 105 336 470 470 432 330 330 379 77 342 224 300 300 382 186 186 54 172 273 470 470 120 240 325 177 177 177 378 345 141 141 281 342 168 470 411 171 171 171 171 252 314 401 196 196 217 70 65 265 265 265 85 85 85 139 139 375 375 185 269 433 427 427 247 247 126 126 126 326 326 23 23 23 23 23 101 149 149 149 321 321 287 111 111 111 438 356 203 64 90 212 144 208 386 431 376 376 376 376 85 37 24 35 259 354 420 420 422 143 144 27 351 368 453 168 106 111 111 111 438 438 251 251 251 241 266 266 266 266 173 402 402 221 75 161 161 79 499 499 428 85 146 146 173 173 176 176 176 328 200 200 117 404 404 439 439 225 237 237 260 260 260 260 260 391 391 289 321 321 321 209 287 287 16 16 16 16 16 88 88 177 177 177 177 35 478 478 68 172 172 444 444 444 360 339 339 394 478 478 232 232 68 172 344 344 344 344 344 274 43 43 43 364 364 276 174 319 319 348 348 348 64 64 212 212 300 469 134 349 155 262 262 100 100 497 122 45 45 45 325 111 111 111 203 53 90 212 144 106 88 319 135 135 248 465 377 87 87 87 251 251 251 251 241 278 278 278 173 402 402 345 333 220 220 164 219 477 477 477 88 89 89 446 53 212 354 354 255 251 251 251 251 241 431 235 235 235 235 235 348 248 248 465 449 377 123 123 123 219 219 477 477 477 477 477 132 13 321 247 312 126 126 326 326 326 326 326 326 101 101 149 149 228 321 412 287 111 111 111 438 202 402 6 479 463 463 463 280 29 382 245 8 354 354 134 497 251 241 431 235 235 235 235 348 76 465 108 123 123 123 88 109 475 475 94 475 475 475 301 8 354 106 493 493 240 325 41 41 41 19 454 454 229 491 247 312 126 126 23 408 149 149 228 321 320 7 331 307 307 307 167 457 457 42 147 380 485 213 213 286 286 139 139 175 359 474 474 41 41 19 19 454 454 13 414 170 47 47 47 491 47 491 491 47 491 102 435 80 80 289 321 7 7 354 159 159 159 314 35 22 448 448 464 464 255 38 162 68 115 273 106 265 265 85 85 146 175 81 242 203 250 250 345 141 141 281 453 9 198 22 283 455 43 364 364 276 109 109 498 498 498 396 271 271 39 39 86 86 238 6 227 419 439 78 56 56 28 491 28 491 2 491 2 341 341 12 12 21 21 23 101 101 101 149 149 228 321 287 287 111 111 202 202 202 280 29 106 350 350 350 175 466 166 166 166 301 8 137 137 137 137 94 199 340 340 340 94 199 277 277 385 457 393 205 155 155 332 148 148 148 372 372 245 399 399 217 70 65 319 319 319 319 379 379 243 77 270 433 433 112 427 247 247 126 126 23 408 408 391 228 321 320 320 159 159 159 159 129 259 127 114 92 92 92 92 457 457 141 141 141 281 342 168 168 340 340 116 10 479 331 331 230 230 230 169 169 169 352 352 352 352 352 352 112 112 78 56 421 421 491 15 15 15 193 193 193 17 +17 17 17 363 363 363 51 51 51 228 491 321 321 209 177 177 177 177 356 77 342 142 397 336 345 109 498 498 498 313 186 39 342 68 198 114 114 242 446 116 457 335 401 321 226 321 209 475 475 475 475 475 475 475 475 422 349 164 214 214 214 214 200 248 219 152 152 152 143 458 192 389 389 34 121 121 399 217 217 217 217 473 65 486 486 486 460 460 460 24 310 107 107 242 275 275 275 303 303 117 404 13 78 170 170 491 491 312 187 187 187 187 12 12 12 12 12 408 408 149 149 228 321 320 7 473 258 258 258 31 342 224 494 494 31 162 232 232 105 105 336 470 432 432 330 379 64 77 77 224 300 300 382 186 186 54 273 470 470 240 34 177 177 378 345 141 141 281 9 142 397 364 364 109 109 278 143 458 192 192 469 325 34 223 130 402 196 70 429 429 429 422 108 377 87 87 236 259 108 119 119 437 405 405 405 405 206 178 35 321 26 386 266 266 266 266 266 178 458 96 321 127 114 92 92 92 92 92 167 385 427 82 247 126 126 326 326 326 193 193 193 +17 17 17 296 317 491 184 184 184 184 289 321 320 127 0 0 0 0 378 354 347 347 347 245 416 129 321 144 180 484 484 484 484 120 37 37 37 24 24 404 414 414 414 47 47 47 47 491 47 80 80 321 321 289 7 219 152 152 152 116 94 331 84 84 84 84 16 274 98 229 247 247 126 326 326 326 326 101 149 228 321 321 320 22 448 464 255 38 162 342 115 273 106 265 265 85 85 146 175 175 81 242 203 394 76 259 74 485 213 213 213 252 215 259 354 100 100 100 497 98 98 98 13 417 417 170 170 170 170 28 491 28 2 2 2 2 2 2 2 2 2 2 2 2 491 316 316 316 73 289 321 289 321 159 159 159 159 35 127 114 0 222 406 467 356 356 281 162 232 232 68 172 115 344 344 344 344 274 274 251 241 431 278 285 285 302 497 497 186 162 162 232 482 482 482 105 336 144 180 496 496 274 215 457 96 393 155 155 332 332 216 216 448 448 448 464 121 121 399 217 217 65 65 486 460 240 240 310 449 107 242 242 116 33 10 10 10 309 331 418 418 418 418 418 252 99 99 436 436 60 60 298 298 116 199 199 340 340 116 94 199 242 466 94 199 459 44 38 31 162 68 68 115 273 273 265 265 85 85 146 146 175 175 81 81 275 203 203 381 381 48 13 229 321 247 312 126 292 292 292 292 292 292 21 21 23 23 23 23 23 23 260 260 260 260 260 391 391 228 321 321 412 287 287 350 350 350 350 350 250 81 166 166 166 422 36 310 395 395 151 151 150 39 86 238 272 34 340 340 116 466 22 448 448 464 464 493 493 493 300 300 382 245 14 14 411 411 153 153 372 372 372 396 349 349 234 234 25 242 275 275 379 379 471 471 49 269 433 390 390 112 112 56 56 56 305 170 28 28 28 491 28 491 28 362 491 362 491 362 362 362 362 40 40 362 362 362 305 362 362 491 218 218 40 40 40 40 435 435 211 21 326 326 408 408 408 149 228 321 177 177 177 177 378 364 345 141 141 141 141 281 453 9 142 336 74 190 487 104 278 325 34 324 324 464 464 121 121 121 64 161 161 487 469 186 54 86 6 272 176 176 328 200 248 76 465 377 87 123 255 255 399 217 473 65 486 486 460 460 240 310 449 242 242 116 394 76 465 214 214 214 328 200 248 49 453 342 168 255 8 354 180 113 113 113 113 167 167 35 198 198 114 114 114 57 57 120 282 203 381 381 381 117 48 229 321 247 193 193 17 +17 17 363 363 51 228 373 489 489 489 489 88 88 254 254 254 314 8 354 137 137 137 33 394 478 478 482 482 482 6 272 371 189 189 424 424 497 122 34 34 242 116 285 199 255 43 43 109 109 403 403 171 301 349 205 155 165 165 165 53 58 156 156 156 156 245 129 129 321 74 74 351 351 351 264 264 468 468 406 11 11 379 379 77 77 342 224 340 340 94 199 156 156 156 245 14 14 411 411 188 121 121 121 53 394 76 205 261 25 469 11 379 379 77 342 342 224 41 41 41 301 143 259 354 62 62 62 62 464 464 44 44 44 129 321 458 208 208 190 190 441 487 487 153 424 424 182 182 497 497 497 497 122 10 10 479 331 498 498 498 498 498 396 271 186 39 323 323 142 489 489 489 489 422 32 239 321 384 371 180 265 265 265 265 85 85 146 24 35 259 354 255 255 349 155 155 148 148 148 387 186 99 400 400 400 30 143 458 144 389 389 314 90 458 144 121 121 203 394 76 4 205 261 25 470 443 443 443 169 271 150 39 433 433 433 160 112 427 56 247 312 312 312 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 23 23 260 260 260 260 260 260 391 149 228 321 321 412 287 287 111 111 438 219 219 219 485 485 374 186 162 54 86 238 272 272 494 139 175 251 241 431 265 265 85 146 146 464 464 255 43 43 109 109 403 171 171 143 192 192 469 314 314 196 196 479 331 428 428 428 428 146 385 35 75 342 224 89 446 94 199 255 255 217 217 473 65 486 486 460 460 368 310 449 60 242 116 116 394 76 259 214 214 214 214 200 200 471 49 453 26 26 241 266 266 266 266 266 266 416 96 198 198 114 92 92 92 92 92 92 167 385 233 131 229 247 247 126 126 326 326 326 326 326 408 408 149 149 228 491 289 321 321 354 420 420 143 458 192 485 494 368 342 168 111 111 111 240 325 371 371 278 278 116 33 33 58 72 110 110 202 202 202 402 402 36 119 119 103 103 103 103 85 299 299 203 53 473 340 340 466 22 283 455 236 384 371 93 93 93 93 207 207 207 19 454 263 417 417 417 417 417 170 170 28 491 28 491 491 2 491 2 491 2 2 2 163 316 491 435 435 435 435 321 321 321 435 287 111 111 438 438 458 445 357 357 443 271 31 342 342 198 114 92 92 169 77 342 142 397 345 346 181 428 438 464 464 365 330 203 394 478 172 115 273 344 344 344 274 349 164 164 164 470 278 278 120 330 388 195 195 117 48 417 417 417 170 47 47 47 47 47 47 491 491 47 491 80 80 80 321 435 435 287 287 111 111 111 438 464 365 365 365 330 203 53 64 212 161 79 288 151 240 314 131 393 262 262 100 497 497 349 164 224 470 432 365 330 94 199 331 145 290 290 434 434 339 212 131 180 284 265 265 85 85 207 207 454 454 229 321 247 312 312 126 292 292 292 292 292 292 292 292 292 292 21 21 21 21 21 21 21 21 21 21 21 21 101 101 149 149 228 321 321 320 127 0 0 222 468 356 356 356 453 342 242 116 199 44 44 44 129 35 401 401 401 321 74 351 351 278 278 178 458 192 180 125 125 125 348 250 70 46 46 46 46 46 438 301 8 239 354 106 106 84 496 496 496 496 413 413 413 471 471 49 269 433 390 160 112 56 417 201 201 201 201 193 193 17 +17 17 17 296 317 317 184 184 184 289 209 287 287 111 111 111 438 438 314 32 239 384 371 371 374 374 132 274 251 251 241 431 266 266 266 266 173 402 402 36 108 87 87 88 88 255 255 399 217 217 65 65 486 486 460 460 240 310 107 395 242 275 116 199 199 111 111 85 438 203 203 53 10 10 309 331 331 265 265 428 428 146 146 186 39 342 68 68 224 224 11 116 33 394 472 401 401 321 74 425 425 386 431 319 319 319 203 53 53 53 76 401 259 345 333 333 220 220 402 472 221 239 384 371 278 278 53 53 394 76 259 74 302 302 497 497 49 453 342 168 340 340 116 250 70 46 46 46 46 438 464 464 145 139 139 293 293 122 8 354 354 84 84 496 496 274 185 39 433 433 390 160 112 112 56 56 56 56 28 28 491 491 28 28 491 491 362 491 362 362 362 491 362 362 362 362 362 362 491 362 362 211 211 362 491 369 369 369 369 369 369 369 369 21 21 21 21 21 21 21 260 260 260 260 260 260 391 391 391 491 289 321 321 7 7 345 333 333 220 220 314 32 4 4 127 114 258 258 258 258 258 31 39 342 433 390 390 390 160 160 160 160 97 97 225 225 80 80 80 321 321 7 217 473 329 329 329 329 329 164 164 485 485 485 485 374 368 31 142 221 336 27 121 399 53 76 465 74 351 351 365 365 365 330 388 64 219 398 398 275 275 116 471 471 478 66 482 238 6 272 106 405 405 405 167 215 96 96 75 108 119 437 405 405 405 405 206 206 178 458 192 176 176 328 328 200 303 48 48 417 225 80 80 491 321 80 289 289 320 74 437 437 306 306 396 396 396 35 35 26 359 359 474 474 474 474 19 19 454 229 321 247 126 126 326 326 326 326 101 101 408 391 228 321 289 321 320 354 420 422 143 144 27 494 278 186 99 400 400 400 378 378 141 141 141 281 168 106 113 113 113 206 240 285 34 462 462 402 401 259 259 354 380 380 288 443 120 120 169 169 169 169 352 352 352 352 352 352 97 89 55 322 67 90 90 259 74 74 437 437 306 306 306 396 396 385 35 26 359 359 474 474 324 301 301 354 420 420 420 143 321 144 27 351 253 253 368 453 342 198 198 127 0 0 0 0 58 110 254 254 254 131 133 147 147 288 213 213 143 233 310 107 447 447 447 198 198 22 283 455 8 354 354 329 151 151 416 416 41 41 41 41 19 19 454 454 170 170 491 491 312 491 312 312 292 292 292 326 326 326 326 326 101 101 101 149 149 228 321 320 479 331 307 307 61 285 34 44 44 44 94 199 493 493 493 493 216 300 300 382 245 43 364 364 276 109 498 498 498 498 396 313 313 314 36 430 430 430 430 36 36 310 107 400 400 400 30 422 162 162 68 68 115 273 470 403 403 207 207 464 464 89 319 348 64 76 465 108 139 139 139 139 497 122 216 0 0 0 0 58 254 254 254 314 26 251 241 431 443 443 443 169 402 402 96 75 472 198 198 22 283 455 4 4 4 280 278 278 278 175 175 81 459 469 37 37 24 310 107 395 89 89 322 322 250 250 347 347 347 236 129 161 79 79 499 499 265 85 85 146 146 173 173 176 176 135 135 200 248 248 384 371 180 315 315 315 450 450 413 94 199 44 44 38 342 342 68 482 6 336 384 371 213 213 213 213 252 215 129 321 26 26 26 81 278 278 285 26 302 497 497 58 58 183 72 351 278 278 278 139 139 375 375 375 375 98 98 13 229 321 247 15 15 193 193 193 17 +17 17 17 363 363 363 51 51 228 321 321 320 5 5 455 42 42 147 380 380 496 496 496 274 274 122 24 131 472 221 259 74 437 306 306 306 306 169 167 36 449 69 223 130 130 402 402 345 345 109 407 407 407 385 36 75 310 395 254 254 254 314 259 354 137 137 137 33 394 394 465 144 27 351 189 189 151 167 167 457 478 478 66 68 68 115 344 344 344 344 344 344 274 236 32 239 384 371 213 213 213 213 215 129 321 354 359 359 474 474 464 340 340 116 394 465 377 377 123 123 198 22 283 455 38 162 232 482 172 115 273 106 405 405 405 405 206 169 402 402 402 6 272 472 472 482 482 482 115 273 106 153 153 387 387 387 139 139 302 302 375 375 98 13 321 247 247 126 126 326 326 326 326 101 149 149 228 321 321 127 45 45 45 35 259 127 5 5 455 129 129 259 354 354 180 486 365 365 365 365 360 200 200 243 243 233 270 270 270 390 390 390 390 97 97 225 225 225 373 155 261 487 288 288 278 330 339 64 310 447 447 238 272 397 345 333 220 220 402 221 129 401 321 354 425 425 241 431 374 374 374 203 53 53 473 176 176 135 328 200 200 248 248 248 364 276 276 346 346 265 428 85 85 139 293 293 293 122 122 401 401 401 310 310 107 107 395 351 470 264 264 468 468 468 337 337 324 422 36 36 161 161 487 487 288 41 41 246 318 49 453 342 168 89 116 116 33 394 394 478 66 68 68 68 26 26 251 241 81 278 278 278 203 53 250 250 250 250 250 276 346 346 428 428 428 146 252 36 472 221 401 401 321 321 354 354 498 498 498 498 396 143 36 310 107 107 395 50 50 50 50 185 185 433 433 160 112 427 247 247 126 126 292 326 326 101 101 149 149 228 289 321 320 347 347 347 186 162 232 232 68 172 115 273 204 204 204 204 280 29 495 134 302 302 497 497 349 349 234 234 261 25 213 213 213 252 252 449 34 255 255 8 259 354 180 230 230 319 173 402 402 198 127 222 222 222 222 222 313 58 72 72 110 110 120 120 120 120 120 37 37 24 471 270 270 433 433 160 18 112 112 56 56 491 28 28 28 491 28 491 28 491 362 362 491 362 362 362 491 491 362 491 491 211 211 102 491 369 369 369 369 21 21 21 21 21 101 101 149 149 321 321 320 127 5 5 236 129 36 310 107 395 351 91 91 91 91 85 85 85 85 139 293 293 122 122 131 472 221 401 321 74 441 189 189 240 285 34 180 113 113 113 113 167 285 449 449 156 156 156 313 58 58 72 72 294 294 294 294 294 294 294 282 388 64 64 212 34 89 89 322 53 212 32 259 354 380 380 189 496 496 274 143 458 192 180 230 230 230 169 169 352 29 44 44 245 8 32 321 354 190 380 288 365 365 365 365 330 388 64 76 76 310 107 107 395 462 462 462 402 402 133 276 276 346 346 486 315 315 315 139 450 293 122 122 131 472 221 401 321 75 74 425 425 386 386 386 431 319 319 319 203 203 381 381 381 381 381 117 198 198 127 45 45 45 236 401 401 401 321 354 190 380 499 151 151 169 169 99 447 447 238 6 272 34 255 416 192 180 432 432 330 379 77 77 342 342 198 22 283 283 38 162 342 115 273 265 265 265 85 146 146 325 34 69 130 130 198 22 283 455 8 259 354 106 151 151 151 416 416 192 41 41 41 41 19 19 454 454 229 321 247 312 15 15 15 15 15 15 15 193 193 193 193 17 +17 17 17 296 363 363 363 363 51 51 51 184 184 321 184 321 321 209 188 430 430 430 430 342 430 430 430 430 33 64 212 127 114 92 92 92 92 167 457 401 401 401 321 354 354 219 219 485 485 485 374 374 285 285 469 469 349 393 234 155 262 262 100 100 100 100 375 98 98 98 13 13 13 442 442 442 491 442 442 442 442 442 491 102 2 2 2 2 201 40 305 305 305 305 305 40 366 366 102 316 316 316 491 102 491 305 102 102 289 289 321 321 320 181 181 181 181 181 35 449 430 430 430 430 198 114 114 92 92 92 167 457 35 401 75 161 161 161 161 487 487 487 213 213 246 246 246 246 301 26 251 251 241 444 444 444 444 360 360 339 199 176 135 135 135 135 200 200 464 113 113 113 113 167 167 349 155 155 165 165 165 165 466 22 22 283 455 455 259 354 354 180 376 376 365 365 365 328 200 243 76 458 192 483 14 411 411 297 297 297 297 297 297 293 293 497 497 43 364 364 276 346 346 346 428 428 428 146 252 143 36 449 89 89 446 446 33 251 251 251 241 241 431 171 171 171 252 186 39 342 342 342 224 41 41 324 324 301 399 473 476 476 476 476 143 458 192 219 152 152 422 349 164 164 164 214 214 360 360 200 200 248 321 144 192 69 223 223 223 223 223 37 173 352 352 352 402 99 338 400 400 400 400 464 464 464 145 376 376 376 460 169 169 342 342 86 105 6 96 96 272 427 56 247 247 312 126 126 292 292 292 292 292 23 23 23 23 23 260 260 260 260 260 260 391 391 163 491 316 316 316 316 316 316 316 73 289 7 7 7 364 276 276 109 109 139 139 293 293 293 413 309 479 331 331 315 315 315 315 315 450 450 16 98 98 98 13 13 13 229 247 312 312 126 126 126 23 23 23 23 23 260 260 391 391 47 491 491 316 316 80 321 373 412 412 287 287 287 284 306 306 85 438 240 325 34 242 242 94 199 331 84 84 84 16 16 16 16 98 98 98 98 263 13 225 225 225 225 225 225 80 80 80 321 373 66 66 172 179 179 179 179 179 314 196 196 473 65 329 329 329 329 329 164 164 485 485 485 485 485 374 132 98 417 417 417 417 417 417 170 170 170 28 28 28 491 28 491 491 2 491 491 2 491 2 435 2 2 2 366 321 305 305 40 435 40 201 435 435 435 435 435 289 289 321 320 364 276 346 346 265 428 85 146 464 464 44 44 44 8 401 401 321 354 190 190 380 380 499 265 265 265 85 85 146 146 252 325 449 34 255 130 130 402 402 401 321 321 208 208 441 441 441 153 153 153 372 372 396 396 271 186 54 433 433 390 112 427 56 247 247 126 126 326 326 326 326 326 326 326 101 101 101 149 149 228 321 412 44 44 44 8 129 129 259 190 190 190 190 79 380 380 499 499 265 265 265 85 85 85 146 146 146 24 131 335 14 14 226 321 209 411 287 297 297 297 297 297 297 297 293 293 175 175 81 81 340 340 116 33 33 90 250 250 364 364 276 346 346 346 428 428 428 146 358 358 36 131 472 397 345 333 333 220 220 216 44 44 44 251 251 251 251 251 251 241 241 431 266 266 266 266 173 173 173 402 402 402 26 359 359 81 81 324 324 324 301 339 399 217 217 217 217 217 473 65 278 278 31 162 342 86 86 86 6 6 272 41 324 324 324 301 4 4 4 280 470 470 403 171 171 171 171 464 139 139 302 375 375 375 98 263 13 78 78 170 491 421 15 15 193 193 17 +17 17 17 296 363 363 363 51 51 51 228 321 412 287 111 111 111 202 202 202 196 309 479 463 463 463 463 29 382 313 186 162 232 68 68 267 267 267 267 267 339 339 250 250 250 276 174 174 174 319 388 388 303 117 229 229 247 126 126 326 326 101 101 149 149 228 321 289 321 159 159 159 159 325 34 111 111 111 438 438 143 458 445 351 242 116 199 199 255 255 399 217 217 473 65 486 486 486 460 460 240 310 310 107 395 242 242 203 250 250 181 181 181 181 99 338 338 400 400 152 378 378 345 389 389 314 26 26 251 241 241 367 367 367 367 35 458 96 26 386 266 266 266 266 266 266 146 358 143 458 192 419 439 78 170 170 491 28 491 28 28 491 491 2 491 2 102 2 102 102 102 491 491 435 305 289 289 321 209 287 111 111 111 438 438 325 34 180 84 350 413 348 131 34 463 463 463 463 402 29 29 495 467 467 154 154 458 96 96 232 68 105 336 470 470 151 151 178 35 401 75 272 87 87 8 354 420 420 420 464 464 44 255 8 259 259 190 380 380 499 499 428 85 146 146 35 196 196 473 46 46 46 46 438 186 162 68 68 68 115 273 279 279 279 279 279 279 279 375 375 352 352 352 427 491 247 491 312 126 292 23 23 23 101 101 149 149 228 321 321 287 287 111 111 111 438 203 53 394 478 232 172 115 344 344 344 344 274 58 72 72 437 350 350 350 350 350 203 53 250 250 359 359 166 166 324 324 301 10 479 331 231 231 231 274 8 354 29 469 325 41 324 301 378 345 345 389 139 497 175 335 14 14 209 145 463 463 463 463 280 29 382 245 245 43 364 174 174 174 330 348 76 465 75 377 87 87 399 217 473 65 65 264 264 264 468 468 337 337 337 324 324 301 217 473 429 429 429 429 429 246 246 246 19 454 229 321 247 126 126 126 326 326 326 326 326 23 408 408 408 149 149 391 491 289 321 412 287 287 319 319 348 175 81 431 443 443 31 342 342 177 177 177 177 457 70 70 65 65 428 428 146 215 35 259 420 420 420 464 464 44 44 349 234 234 261 25 148 148 148 372 372 467 467 242 348 250 217 473 473 278 278 99 99 436 436 60 60 116 94 199 470 264 264 468 468 468 337 41 41 19 454 454 417 417 417 417 237 237 80 80 321 412 287 287 287 111 111 438 438 31 342 224 494 494 129 74 74 437 496 496 496 274 368 368 9 168 494 44 349 234 234 205 261 148 148 148 372 372 372 467 467 446 116 250 250 473 473 278 278 99 99 436 60 60 242 116 94 199 264 264 468 468 337 337 41 324 301 399 70 473 65 428 428 146 146 457 35 401 196 242 33 33 394 32 32 259 420 420 420 420 420 324 301 173 280 104 104 104 104 337 337 337 337 301 129 321 74 492 492 236 384 371 278 278 278 143 321 192 485 134 134 134 175 81 300 334 59 452 263 229 229 491 312 15 15 15 15 15 193 193 193 193 17 +17 17 17 296 363 363 51 51 51 184 184 184 289 321 320 354 159 159 240 285 34 111 111 111 111 438 236 35 75 371 371 374 374 132 132 88 58 72 72 496 496 496 496 215 35 96 26 34 45 45 45 31 478 478 68 68 68 115 273 231 231 231 203 53 64 212 384 93 93 93 93 464 464 111 111 111 111 438 99 99 338 395 389 389 389 497 129 401 259 74 483 58 72 110 202 202 202 202 173 402 44 44 44 43 43 364 276 276 346 346 428 428 146 146 358 457 401 401 321 75 161 161 79 487 288 443 443 120 271 271 150 39 433 433 433 160 160 160 112 439 56 56 47 47 491 47 491 47 491 491 316 491 73 73 289 289 321 320 127 114 92 92 92 92 92 240 385 35 131 335 226 188 188 356 356 281 342 9 196 70 70 46 46 46 46 438 58 58 72 72 72 72 72 72 72 72 437 265 428 428 146 146 146 464 459 459 459 31 39 86 86 6 272 106 486 428 85 146 438 239 36 371 485 485 286 139 139 175 175 69 462 462 130 29 498 498 498 498 169 164 164 26 26 359 81 324 324 301 8 259 354 425 386 81 459 271 271 271 39 39 433 390 390 160 112 427 491 247 312 126 126 292 292 292 326 326 326 326 326 326 326 326 326 101 101 149 149 149 228 321 321 287 287 111 111 438 438 143 36 107 395 494 31 342 342 26 26 26 241 266 266 266 266 173 402 401 401 401 259 74 190 487 278 278 325 34 324 324 143 458 321 208 208 208 386 386 431 496 496 496 496 274 274 216 164 270 270 433 160 18 427 56 56 47 47 491 47 47 491 2 2 491 316 316 321 73 289 321 435 209 83 55 55 322 322 212 34 111 111 111 111 438 202 202 402 196 479 331 463 463 463 29 29 382 58 58 72 110 110 254 240 325 34 44 44 129 129 259 74 190 487 278 278 325 324 324 324 236 239 259 161 79 487 288 443 443 169 342 342 224 340 340 340 250 217 70 46 46 46 46 46 438 251 251 241 431 431 428 428 428 146 146 186 402 352 342 224 45 45 325 325 111 111 111 178 458 192 242 242 116 33 250 456 456 456 456 456 399 217 473 65 432 432 330 203 53 212 212 29 334 334 59 59 452 263 229 321 321 312 312 126 292 292 1 292 292 1 1 1 1 1 23 260 260 408 408 391 391 391 289 289 321 320 159 159 159 285 255 255 402 402 458 144 441 441 441 153 153 372 372 396 186 186 54 54 86 112 427 56 56 201 201 201 201 201 201 201 201 201 201 201 321 435 435 435 320 209 177 177 177 356 356 342 483 14 226 321 411 297 297 297 297 297 297 293 122 216 22 283 455 399 399 138 138 138 138 372 396 313 449 377 87 87 87 251 241 367 367 367 367 458 96 393 393 234 234 261 25 148 148 148 372 245 43 345 109 109 313 236 36 75 108 377 485 489 378 88 356 356 356 281 342 430 242 242 116 212 131 277 277 277 277 277 385 233 75 419 427 56 56 170 170 312 312 292 292 292 1 1 1 408 408 408 408 305 321 209 83 55 55 322 67 466 127 361 361 361 361 361 388 195 117 229 229 247 126 126 193 193 17 +17 17 363 51 51 228 321 209 111 111 111 438 458 192 192 242 116 199 255 255 399 217 473 65 486 486 460 240 240 35 310 107 242 298 116 379 466 45 45 45 285 34 111 111 365 203 203 394 212 161 79 487 288 443 169 150 39 86 86 238 6 336 90 221 321 144 208 153 153 153 387 372 396 313 24 310 107 459 459 271 39 433 68 68 68 359 474 474 474 474 19 19 454 454 417 442 442 170 170 28 28 491 2 491 491 2 491 2 491 2 2 491 316 316 73 289 321 321 7 127 258 258 31 162 342 142 142 196 217 70 65 153 387 387 396 348 94 176 176 328 200 200 248 345 409 409 409 94 199 111 111 111 438 251 241 431 443 443 169 169 352 402 198 198 448 448 464 464 255 38 38 162 232 68 68 115 273 265 265 265 85 85 146 175 175 81 81 242 203 203 53 65 111 111 111 438 349 205 205 261 25 189 139 139 293 122 478 478 66 68 172 115 344 344 344 344 88 88 255 255 186 99 338 338 338 338 338 395 470 290 290 290 290 290 434 339 53 394 212 401 221 321 354 420 420 143 458 192 278 253 368 453 342 168 111 111 111 438 72 110 110 254 254 240 35 321 377 87 87 87 43 364 109 109 264 264 313 216 216 114 258 258 31 31 342 142 142 72 72 72 72 72 72 437 153 481 306 372 406 467 467 469 240 285 34 106 106 424 424 424 424 497 122 122 133 401 321 364 276 109 278 330 348 33 394 77 77 342 224 41 324 324 301 236 321 75 161 79 79 288 288 443 120 271 271 39 39 433 390 160 160 112 427 491 247 312 126 292 292 292 292 326 326 326 326 326 23 23 23 101 101 101 101 101 149 149 228 289 289 321 289 209 209 287 297 297 297 297 297 293 293 216 22 448 448 448 378 106 153 372 372 372 349 349 205 261 25 242 379 379 471 77 342 110 110 110 460 240 314 35 384 87 87 43 43 276 109 109 468 468 240 216 216 57 57 203 217 473 219 219 152 374 116 94 331 84 84 84 84 16 274 98 98 13 13 414 491 170 491 170 187 491 187 187 23 23 101 101 149 149 149 321 209 44 44 44 399 217 70 473 65 498 498 396 313 35 310 107 107 242 116 116 199 34 89 446 116 33 58 58 72 437 496 496 496 496 215 35 35 96 270 342 224 242 242 116 33 466 466 241 431 376 376 376 169 150 150 86 238 272 397 397 109 109 278 278 64 76 449 300 382 313 236 239 259 384 371 84 496 496 413 94 199 158 158 158 252 325 449 191 191 191 314 36 164 119 161 161 487 487 487 337 213 213 324 324 3 3 58 72 72 437 319 319 319 348 64 212 300 382 313 313 314 314 219 219 219 180 180 106 306 306 306 306 306 396 396 37 37 24 77 270 168 168 462 462 402 402 402 345 109 109 330 116 33 394 77 77 224 41 41 324 236 108 377 123 123 216 283 448 448 464 464 255 38 162 68 115 115 106 265 265 265 85 146 299 175 175 81 275 203 203 381 117 48 13 491 491 312 312 126 292 292 292 292 292 21 21 21 23 23 23 260 408 391 391 391 321 321 373 66 68 115 273 231 231 231 319 53 76 76 74 485 213 213 301 8 354 100 497 497 186 162 68 115 273 470 443 240 325 177 177 177 457 345 141 141 281 9 221 336 354 420 420 143 259 144 27 351 368 368 342 224 30 30 422 143 144 27 389 389 389 314 196 242 242 33 394 478 232 68 172 115 273 443 443 139 175 175 175 81 277 277 37 385 131 404 321 247 247 126 15 15 193 193 193 17 +17 17 296 51 51 184 184 184 289 321 320 159 159 240 199 111 111 111 111 111 438 314 133 133 147 380 180 486 443 240 240 216 300 300 382 245 8 354 255 255 251 251 251 81 444 444 444 444 246 252 173 198 164 45 45 45 34 177 177 177 345 141 141 281 453 342 168 180 113 113 113 285 285 69 223 130 198 22 283 455 455 129 259 144 27 437 480 480 480 146 299 339 64 10 459 459 459 271 342 342 224 69 223 130 280 257 257 257 31 9 142 142 72 72 437 306 306 306 306 306 396 396 385 233 131 133 133 430 430 430 430 430 430 430 430 430 212 131 219 219 219 477 477 477 374 132 132 98 48 13 170 170 170 491 312 312 28 341 341 341 341 341 12 12 12 21 21 21 21 23 23 101 101 149 391 391 73 289 289 321 7 70 409 409 409 399 53 473 429 30 422 143 458 144 180 189 405 405 206 285 34 125 125 125 348 466 22 283 455 236 36 161 161 161 161 487 487 288 290 290 290 290 434 434 434 339 195 404 229 82 247 126 126 326 23 101 101 149 149 321 321 287 111 111 111 349 205 261 25 189 189 139 293 122 35 449 34 253 253 453 453 342 118 118 118 118 402 402 14 226 321 209 411 145 204 204 204 204 204 204 29 337 337 337 301 8 259 354 109 151 240 325 34 41 324 301 399 217 70 65 151 169 150 342 105 221 259 354 420 420 301 301 251 251 241 367 367 367 367 367 458 192 192 176 135 200 200 464 415 415 415 415 457 457 217 429 429 429 464 464 89 203 394 129 401 321 75 74 351 278 278 278 325 449 41 41 324 324 324 464 434 135 328 328 200 248 248 248 429 429 429 429 429 19 19 454 417 417 170 170 170 170 28 491 28 2 491 491 2 491 2 2 491 2 316 491 491 316 73 289 289 321 321 354 159 159 159 285 34 111 111 111 111 111 111 438 438 239 384 371 180 151 151 31 54 54 142 397 397 109 109 189 330 457 394 465 108 377 87 87 43 364 276 109 372 498 396 396 178 458 192 89 340 94 199 255 255 399 217 473 65 486 486 486 460 240 240 36 310 107 242 275 275 116 195 466 45 45 45 45 325 34 111 111 111 111 438 438 58 72 72 72 110 110 110 110 254 254 240 285 34 106 125 125 125 125 466 22 283 455 399 70 65 496 496 496 186 186 238 6 6 472 221 401 401 47 47 491 491 47 80 491 80 401 321 354 354 485 485 219 219 219 485 485 374 374 374 132 132 132 285 449 469 469 469 349 349 155 262 262 100 100 100 497 497 122 129 401 401 401 401 321 75 74 74 437 351 351 290 290 171 171 171 171 171 139 139 139 139 497 497 497 497 122 32 32 32 401 401 321 354 425 425 425 241 431 431 374 374 374 374 132 132 132 132 186 162 232 232 232 68 68 172 115 273 278 278 139 139 293 122 458 458 96 472 221 401 401 75 161 79 487 288 443 443 169 271 150 39 433 433 433 433 160 427 247 247 247 126 126 126 326 326 326 326 326 326 326 326 326 326 101 101 101 149 228 228 321 321 321 354 420 420 422 143 458 192 485 278 368 453 9 397 345 409 409 409 67 219 219 152 152 152 152 14 14 411 411 284 284 284 353 353 353 396 406 467 467 255 255 255 399 217 473 65 486 486 365 460 240 310 107 107 447 242 94 199 176 176 328 200 248 248 219 152 152 152 378 399 70 65 428 428 428 146 143 449 34 253 253 9 142 397 336 109 109 139 139 175 175 81 255 255 217 217 65 486 486 460 240 240 36 310 107 242 242 116 394 478 66 342 224 231 231 231 76 76 198 214 214 214 328 200 248 250 364 276 109 498 498 396 169 164 164 133 364 276 276 346 346 346 265 85 85 85 355 355 375 375 375 98 229 229 321 247 15 15 15 15 15 193 193 193 193 17 +17 17 17 296 363 363 363 363 51 51 51 51 228 321 321 83 55 55 322 67 131 44 44 44 236 32 401 401 401 401 401 401 321 354 278 278 278 278 278 360 252 416 458 192 445 183 72 72 72 72 110 110 486 486 486 460 460 240 35 131 483 226 226 226 321 411 287 297 297 297 297 297 297 297 293 293 293 122 349 349 234 234 234 234 261 425 425 386 386 431 486 315 315 315 450 88 372 372 304 304 304 368 269 342 342 89 89 446 446 33 10 10 309 479 331 331 284 405 405 206 240 325 176 176 328 200 200 248 248 76 259 74 74 425 425 425 386 386 431 374 374 374 374 374 434 203 381 471 471 49 433 433 97 427 247 247 126 326 326 326 101 149 228 321 83 55 55 322 67 34 44 44 236 129 259 144 27 424 424 424 424 424 424 424 424 424 497 122 122 131 133 133 364 276 346 346 346 405 405 206 206 169 35 36 107 107 395 89 89 446 33 394 90 90 401 401 401 321 75 445 445 445 351 278 278 240 314 90 401 401 321 144 208 425 386 431 431 431 266 266 173 173 402 270 270 342 224 89 89 446 33 33 394 32 32 401 401 401 354 354 374 374 374 374 132 233 385 233 270 270 270 390 390 390 18 112 56 56 56 47 47 491 47 47 47 491 491 435 435 321 435 435 435 287 287 111 111 111 438 349 205 205 261 25 189 139 139 293 167 457 401 321 75 310 107 107 395 286 286 468 468 313 313 285 34 230 230 230 230 215 35 402 133 147 147 147 499 499 428 428 146 146 325 34 255 255 43 364 109 109 403 403 403 207 19 454 229 321 247 126 126 326 326 326 326 101 101 149 149 228 412 83 55 55 55 322 212 34 111 111 111 111 438 121 121 339 394 212 107 180 106 153 387 387 146 252 314 196 217 46 46 46 438 438 129 36 161 161 487 288 278 173 402 96 36 272 377 123 123 216 22 448 448 448 464 464 145 265 265 85 146 146 175 175 81 242 116 212 133 133 333 333 220 220 335 14 14 226 226 321 209 297 297 297 297 297 297 297 293 399 399 70 46 46 46 46 46 438 438 399 217 70 65 265 265 428 428 428 146 358 358 233 36 227 419 419 439 417 417 170 170 170 491 28 28 491 28 442 28 442 362 491 362 102 491 362 491 362 362 102 362 362 491 362 491 362 218 218 491 218 102 369 369 369 369 369 21 21 21 101 101 149 149 228 289 412 287 111 111 111 378 345 141 141 141 141 281 453 342 242 242 116 212 131 44 44 44 32 32 32 321 354 354 278 278 278 385 457 478 478 232 68 68 172 115 470 470 278 120 385 143 458 458 144 27 351 319 319 319 53 176 176 135 200 200 464 106 410 410 410 410 173 280 29 29 495 467 340 340 116 466 22 283 455 8 354 354 180 496 496 496 496 496 274 274 37 24 227 419 427 78 78 170 491 187 187 292 23 23 23 23 101 149 149 228 289 321 320 479 331 265 265 428 146 240 216 300 300 378 43 345 141 141 281 9 221 196 473 258 258 258 342 224 494 494 31 232 232 105 105 336 470 432 432 330 379 64 77 342 224 224 334 334 334 59 452 452 263 321 247 126 126 23 23 101 101 101 149 149 321 321 287 297 297 293 216 216 114 84 84 186 186 338 400 400 400 422 239 310 107 395 395 432 330 94 199 495 495 495 467 134 134 359 359 166 166 166 324 324 464 464 356 356 120 120 271 185 433 433 433 433 160 160 112 417 417 417 237 421 421 491 421 128 128 128 193 17 +17 17 17 296 296 363 363 51 51 51 321 321 373 338 400 400 400 422 422 162 68 115 470 470 443 240 314 35 310 107 400 400 30 422 58 110 110 254 254 254 240 35 242 242 242 33 457 465 108 119 437 103 103 103 146 299 203 53 64 212 377 87 416 416 445 180 278 443 385 385 77 478 66 232 68 172 115 273 470 278 278 120 178 458 458 192 225 225 225 7 276 346 346 405 206 206 35 310 107 135 135 135 248 212 384 87 87 38 342 68 172 267 267 267 267 301 301 216 45 45 45 325 111 111 111 438 438 239 384 371 278 278 116 242 33 90 393 393 234 261 25 106 481 481 481 293 293 175 14 14 410 410 410 410 410 280 29 29 245 245 8 354 153 153 153 153 372 372 372 37 24 404 439 229 491 247 312 312 187 292 292 292 292 292 21 21 21 21 23 101 408 408 149 321 321 373 400 400 400 30 422 162 162 68 115 470 470 120 240 240 314 310 338 338 400 400 400 30 301 10 10 309 479 331 463 463 463 463 29 382 313 186 186 162 54 115 273 106 481 405 481 293 216 216 283 283 455 236 401 401 321 354 213 213 213 252 325 34 69 223 130 402 196 429 429 429 429 422 393 155 332 332 245 129 321 74 190 190 380 499 499 486 481 481 293 175 175 81 176 328 200 200 255 255 8 8 180 113 113 113 113 113 450 450 167 385 75 227 419 439 78 78 170 491 28 491 491 341 341 12 12 12 21 21 21 21 21 21 101 101 149 391 228 491 289 321 321 320 159 159 159 285 34 118 118 118 118 261 177 177 177 131 90 259 144 445 351 351 443 443 240 215 35 96 96 272 156 156 382 349 205 155 165 165 165 165 53 394 212 212 354 420 420 360 360 360 135 135 200 200 248 248 478 66 68 68 68 115 267 267 267 213 422 186 162 68 68 68 115 273 470 278 278 178 143 458 192 177 177 77 77 342 168 44 44 399 217 217 70 65 498 498 498 396 186 162 54 172 224 41 41 324 464 464 111 111 438 438 239 75 371 371 278 278 314 35 401 259 74 190 190 499 499 499 405 450 293 293 175 175 81 356 356 281 453 430 430 430 430 64 465 34 277 277 277 277 385 385 233 321 419 427 491 491 312 312 312 187 187 12 12 12 12 12 21 21 326 408 408 149 149 321 321 209 55 55 322 322 199 111 111 111 378 43 364 174 174 319 348 325 34 191 191 36 87 87 87 162 68 68 172 267 267 267 267 464 464 464 204 204 204 204 29 29 337 469 164 164 214 214 214 200 248 114 45 177 43 345 141 141 281 86 238 6 377 87 87 8 354 420 420 420 422 162 162 68 68 68 68 267 267 267 267 267 434 339 339 199 125 125 125 125 348 466 114 114 92 92 92 167 457 401 321 354 354 496 496 496 496 274 37 24 131 427 321 247 126 326 326 326 326 326 326 326 101 149 228 289 321 321 354 420 420 422 143 144 27 180 253 368 453 168 111 111 111 438 236 325 371 278 278 278 314 242 242 242 457 10 10 10 309 331 331 84 84 84 274 43 43 109 109 181 216 216 300 300 300 406 467 111 111 111 111 438 240 325 34 463 463 463 280 29 382 382 58 72 72 110 202 202 202 202 402 44 44 116 479 331 493 493 493 493 216 300 495 406 467 467 499 405 206 215 29 29 469 469 236 36 108 119 485 485 485 374 374 330 94 199 469 469 469 325 41 41 41 19 19 19 454 454 229 170 491 491 15 15 15 15 15 15 15 15 193 193 193 193 193 17 +17 17 17 296 363 363 363 51 51 51 51 491 491 184 491 184 289 321 321 287 287 287 284 284 284 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 98 98 98 225 98 98 225 225 225 373 164 164 289 321 164 127 127 114 0 0 264 468 468 406 406 467 467 406 467 467 467 44 44 251 251 251 251 251 241 241 431 431 284 284 405 405 206 206 167 457 457 196 70 70 70 138 138 138 138 372 313 313 236 401 401 401 401 75 310 107 107 107 395 395 395 395 264 468 468 406 337 337 324 324 422 143 36 161 161 487 487 41 41 324 318 49 9 9 483 14 321 411 297 297 297 297 297 297 297 175 175 81 242 340 203 53 53 394 76 401 321 354 425 425 425 241 431 431 374 374 374 374 374 374 132 132 132 413 203 381 381 381 404 229 229 247 312 312 126 292 292 292 292 292 292 292 23 23 23 23 23 23 23 101 408 391 491 491 491 289 289 321 127 114 258 258 258 258 31 342 342 342 483 14 14 321 411 287 284 265 265 85 85 85 146 146 139 175 175 175 81 242 275 116 64 212 131 356 356 356 281 342 198 198 22 283 455 455 129 129 401 321 354 425 425 386 431 374 374 374 374 132 399 53 473 324 324 324 464 459 459 459 31 39 86 86 6 272 472 336 321 74 74 425 425 386 386 343 343 343 343 358 358 358 39 39 433 68 172 115 97 225 287 111 111 438 143 36 107 395 494 31 31 342 26 26 26 251 241 241 266 266 266 173 173 29 277 277 325 285 106 106 297 297 297 293 293 42 42 147 147 380 288 443 443 240 325 325 41 41 324 324 3 464 89 322 94 199 111 111 111 356 203 53 478 478 232 232 68 68 115 344 344 344 344 344 274 274 32 401 401 321 208 208 386 386 431 431 376 376 376 240 285 34 111 111 203 203 53 394 90 465 144 180 84 496 88 176 135 135 248 36 377 87 87 87 251 251 241 431 278 278 173 173 402 183 183 286 286 286 286 264 59 59 452 263 263 417 414 170 170 491 170 28 491 28 28 491 2 491 2 2 491 491 435 435 289 289 321 320 305 287 287 111 111 202 202 280 106 297 297 297 297 88 109 109 469 368 31 342 142 142 72 110 498 498 498 498 313 240 314 198 45 45 45 457 129 259 259 74 190 487 432 330 330 64 64 77 342 342 168 145 329 329 240 131 133 345 345 109 313 285 335 14 14 145 284 265 85 146 146 139 175 81 81 275 275 116 133 133 345 141 141 281 453 342 198 22 283 455 129 129 321 74 190 190 487 487 278 278 325 324 324 464 459 459 459 31 86 86 221 336 321 74 425 386 343 343 343 343 252 186 39 342 342 340 340 340 340 466 22 283 455 43 43 276 109 109 498 498 498 139 139 375 375 375 122 122 131 427 229 247 15 15 193 193 17 +17 17 17 363 363 363 51 51 228 321 83 55 55 322 131 111 111 111 111 438 219 219 219 485 485 374 186 162 54 238 6 161 87 87 464 255 255 399 217 473 65 486 486 460 240 240 310 107 395 242 116 94 199 111 111 378 378 345 141 141 281 342 9 26 26 251 241 431 278 278 173 173 280 176 135 328 200 248 183 183 286 286 286 286 264 59 59 452 263 321 247 247 126 126 292 292 292 23 23 23 23 260 260 391 391 391 316 73 73 289 321 321 354 159 159 159 285 34 111 111 111 111 438 10 10 479 463 463 463 463 463 29 29 382 245 245 42 42 147 147 380 485 485 278 278 359 359 166 166 166 464 464 154 154 458 96 66 86 105 105 336 470 470 151 178 178 96 96 272 191 191 191 325 34 111 111 111 111 438 438 43 364 364 109 109 389 389 120 120 120 37 37 75 419 419 439 78 170 170 170 170 28 491 28 491 28 362 362 491 362 2 362 362 491 491 491 366 491 491 316 316 435 289 321 321 177 177 177 143 36 77 86 86 221 336 384 490 490 490 251 251 251 241 431 428 428 428 428 146 35 35 393 393 155 262 262 100 100 497 43 364 409 409 409 409 33 219 219 152 222 222 406 467 467 467 255 203 217 473 65 486 486 460 460 240 310 107 395 395 469 116 94 418 418 418 418 418 99 436 436 60 298 298 379 471 49 9 142 336 144 27 351 319 319 203 53 90 76 321 161 161 161 487 487 374 374 132 88 88 356 356 356 453 342 430 430 116 64 212 131 277 277 277 277 277 385 385 75 419 427 229 491 312 312 126 292 292 292 1 1 1 1 21 21 21 21 101 408 149 149 228 321 320 320 159 159 159 35 35 198 127 124 124 124 124 124 368 9 142 397 42 147 147 380 288 443 443 240 240 314 131 133 133 147 147 380 496 496 496 274 368 77 270 9 353 353 353 186 186 232 482 172 115 344 344 344 344 274 349 349 234 234 234 234 261 25 319 319 319 240 94 199 41 41 41 41 19 19 19 454 454 414 170 170 312 312 187 187 187 292 292 292 23 23 23 101 101 149 149 228 321 321 320 345 409 409 409 399 473 429 30 301 143 465 144 180 189 189 240 285 94 34 340 340 116 64 76 377 377 123 123 216 216 22 283 455 236 401 321 75 161 161 487 487 288 290 290 290 434 339 199 199 415 457 457 186 338 338 338 395 499 499 306 306 206 293 175 81 81 469 457 457 36 75 108 119 351 315 315 315 315 450 450 413 413 94 199 340 340 466 22 283 455 455 42 42 147 380 288 443 443 240 314 131 133 133 364 147 380 380 496 496 496 274 274 24 77 270 142 221 336 420 420 420 416 445 445 210 210 210 460 330 388 76 384 87 87 87 349 234 261 261 386 431 431 376 376 460 460 169 169 99 436 447 447 221 336 74 74 311 311 311 311 311 311 311 460 169 150 150 342 86 6 272 427 82 247 126 326 326 326 326 101 101 101 149 149 228 321 287 287 111 111 111 438 438 145 145 376 376 460 460 169 150 150 86 238 6 196 196 217 473 258 258 342 342 224 494 494 31 162 68 105 105 336 354 470 432 330 379 64 77 77 224 300 382 382 245 43 345 181 181 181 167 457 217 217 473 476 476 476 252 314 259 22 57 57 203 53 250 250 147 380 288 288 120 120 240 385 131 229 247 126 193 193 17 +17 17 296 296 51 321 412 55 55 322 322 67 478 338 338 400 400 400 30 30 422 186 232 232 172 115 470 470 240 314 36 310 400 400 400 30 422 236 384 371 371 278 278 314 196 242 242 457 309 479 331 84 84 496 88 88 89 446 203 393 155 332 332 332 245 129 259 74 74 278 278 278 325 41 324 324 324 186 162 232 68 68 115 470 470 171 171 252 143 96 196 479 331 307 307 61 167 457 36 377 87 87 14 145 145 376 460 460 169 150 86 105 221 458 208 495 467 467 475 475 475 475 475 301 399 70 138 138 138 138 372 245 245 129 321 208 441 151 151 151 151 169 99 238 238 310 107 60 298 298 298 379 471 471 270 160 427 247 247 126 126 292 292 292 292 23 23 408 408 408 408 391 321 321 373 373 400 400 400 400 422 162 342 115 470 470 240 285 34 111 111 111 438 399 70 65 65 151 150 150 86 6 34 202 202 202 280 145 145 486 460 460 169 150 86 238 272 161 382 467 44 44 44 38 164 401 321 164 180 180 486 315 315 450 450 169 269 9 168 242 116 64 131 34 106 297 297 297 293 293 42 42 147 380 288 443 443 240 325 325 41 41 19 19 454 454 414 321 247 312 126 126 292 292 292 1 23 23 408 408 149 149 228 321 321 209 287 111 111 111 438 31 342 342 494 494 494 129 74 496 496 496 496 496 368 368 453 168 180 111 111 111 111 111 438 58 72 72 110 110 486 486 486 460 460 388 64 314 401 321 108 108 119 374 374 374 374 374 132 132 132 8 8 354 159 159 159 159 314 229 247 247 126 126 326 326 326 326 326 326 326 326 326 101 101 101 149 228 321 373 72 72 268 268 268 268 268 88 430 430 430 430 219 219 152 152 416 144 27 106 88 350 360 135 339 212 87 87 349 349 261 25 480 480 480 85 299 299 299 64 212 384 180 180 486 113 240 285 285 255 255 8 354 180 113 113 113 113 167 167 164 164 164 214 214 214 200 200 471 49 342 9 118 118 118 280 30 30 30 422 239 371 180 84 350 350 413 285 34 145 145 460 460 169 150 86 142 221 336 208 441 151 151 151 151 169 99 447 238 6 310 60 60 298 298 275 379 471 471 471 49 269 433 160 160 112 112 56 56 170 28 28 491 2 2 2 2 289 321 373 373 326 326 326 326 326 326 101 149 149 228 321 321 83 55 322 322 399 250 181 181 181 181 181 35 401 401 401 401 321 384 371 180 71 71 71 71 368 368 453 342 86 221 196 196 473 476 476 476 143 143 401 401 198 22 283 455 455 42 147 380 380 288 496 496 496 274 37 77 77 323 142 397 336 147 147 380 288 120 120 120 37 24 24 419 439 439 78 417 417 170 170 170 170 28 28 28 28 491 362 491 491 362 362 491 362 491 362 491 362 362 491 40 40 40 40 40 40 40 366 366 366 366 316 316 249 7 7 7 7 7 7 7 7 7 7 7 7 364 364 276 276 109 109 84 443 139 139 139 293 293 413 122 309 479 331 331 315 315 315 315 315 450 450 450 16 16 293 98 98 13 13 13 13 13 78 491 170 312 312 126 23 23 23 23 260 260 391 163 316 491 316 289 373 225 225 225 442 287 287 287 287 287 287 111 111 111 438 438 24 325 34 84 242 116 479 331 84 84 84 84 16 16 16 16 375 98 98 98 263 13 13 13 78 47 47 47 491 47 80 80 321 80 373 66 66 172 179 179 179 179 314 196 196 70 65 329 329 460 460 169 349 352 25 485 485 485 485 485 374 132 98 98 13 417 417 417 170 421 421 491 421 421 491 128 128 128 491 128 128 128 128 128 193 193 17 +17 17 17 363 363 363 149 149 228 321 127 114 0 0 313 313 35 354 420 420 420 301 10 479 331 231 231 231 274 186 162 482 482 105 6 144 496 496 496 215 457 393 205 155 332 332 332 216 448 448 448 464 255 399 473 65 486 486 460 240 24 310 395 469 242 116 94 418 418 418 418 418 418 99 436 436 60 60 298 298 116 33 394 466 127 114 361 361 361 282 388 303 117 48 13 80 80 80 321 7 364 345 430 430 430 314 35 401 198 127 114 114 264 264 264 59 59 452 452 13 229 247 247 312 312 312 292 292 292 292 292 12 21 1 21 21 21 21 21 23 101 101 149 149 391 316 316 316 73 73 289 289 321 320 159 159 285 34 430 430 430 399 70 65 111 438 438 143 36 108 119 351 405 405 405 206 178 192 192 176 135 135 248 248 465 377 377 374 374 132 399 70 383 383 383 383 383 385 35 310 310 107 447 97 427 56 56 47 491 187 80 491 80 289 321 320 74 485 213 213 252 215 354 29 302 302 175 175 81 353 353 353 353 467 467 297 297 297 293 43 345 109 469 281 342 342 6 36 119 351 351 139 139 175 81 176 135 200 248 248 429 429 429 429 464 464 111 111 111 111 438 438 239 75 371 371 374 374 374 374 132 132 132 98 98 13 414 247 312 312 126 187 292 12 12 12 12 23 23 23 101 149 149 228 321 321 320 345 430 389 236 310 107 152 152 152 378 42 147 380 180 486 486 460 240 216 300 300 495 406 467 111 111 111 438 314 36 371 371 278 278 314 242 242 242 457 457 108 108 119 437 437 405 405 405 405 206 206 215 35 458 192 419 427 229 247 312 126 292 292 23 1 408 408 408 149 228 228 316 316 80 80 289 321 209 188 118 118 118 118 118 261 219 152 152 152 152 186 162 232 172 115 470 470 403 171 171 422 162 342 342 273 273 84 16 88 106 284 481 293 293 293 150 162 232 86 238 272 371 180 106 284 405 405 405 206 206 215 215 233 352 419 13 229 82 312 187 187 187 187 47 47 47 47 491 491 491 316 491 316 80 80 435 435 435 435 209 111 111 111 438 143 458 445 445 445 351 351 351 365 365 365 365 330 388 64 64 77 77 342 68 238 6 272 180 405 405 405 215 215 35 402 345 409 409 94 199 111 111 111 438 399 217 473 473 476 476 476 143 458 192 180 230 230 215 35 35 70 70 46 46 46 438 438 399 217 70 65 480 480 480 480 299 299 339 394 465 108 377 123 123 123 88 277 277 277 277 385 131 34 106 297 297 297 293 216 114 114 84 84 88 88 177 177 177 143 36 77 342 86 238 6 336 371 490 490 490 349 349 261 469 469 469 458 144 27 100 100 100 375 375 122 122 227 227 419 427 56 56 491 312 312 312 12 12 12 12 12 12 12 12 12 12 260 260 260 260 260 491 163 163 163 366 491 366 491 316 491 366 366 491 40 40 40 40 316 289 321 321 289 7 7 217 217 473 486 486 486 460 460 460 169 164 164 164 164 219 219 219 485 477 477 374 374 132 132 98 13 417 417 417 417 237 421 421 128 128 193 17 +17 17 296 296 321 320 345 141 141 281 453 168 121 121 121 33 212 310 395 395 153 153 387 387 387 146 135 135 135 200 248 183 57 57 203 203 243 478 342 172 115 273 279 279 279 279 375 375 375 169 352 352 352 352 112 427 170 491 491 312 312 312 292 292 292 292 292 21 21 21 21 21 21 21 21 21 260 101 101 149 149 228 321 321 320 251 241 266 266 266 266 146 178 35 96 196 196 217 70 65 496 496 496 496 274 186 39 86 238 6 472 221 336 208 208 441 441 346 346 265 428 85 146 146 277 277 385 131 393 393 234 261 261 25 496 496 496 496 274 274 274 274 186 39 433 342 97 451 451 30 30 301 251 251 251 241 266 266 266 266 146 178 35 96 401 36 108 119 437 405 405 405 206 206 178 458 192 469 469 325 34 459 462 130 402 401 75 74 485 213 213 213 252 252 215 259 74 100 100 100 497 497 43 364 345 409 409 409 409 466 466 127 0 0 0 378 43 345 347 347 347 347 245 43 364 345 109 278 139 175 175 81 176 135 135 200 248 248 465 377 87 87 87 239 384 371 374 374 374 216 216 22 283 455 236 108 119 437 405 405 405 405 206 178 192 192 192 135 135 200 248 248 466 114 57 57 203 394 478 478 232 68 68 115 273 279 279 279 279 279 279 279 375 352 270 433 160 427 247 247 126 326 326 326 326 326 326 101 149 149 228 321 83 55 55 55 322 67 212 384 371 191 191 314 196 479 331 307 307 61 61 285 34 154 154 458 96 66 86 105 105 336 470 151 151 178 178 96 36 272 449 57 57 203 53 394 212 377 87 87 87 458 445 445 445 213 213 213 252 215 129 74 230 230 230 230 230 215 35 259 354 257 257 257 453 453 342 168 432 432 330 330 64 64 64 131 34 223 223 280 277 277 277 277 277 75 227 419 439 427 56 170 170 28 491 28 201 201 201 201 201 491 201 201 201 201 201 201 491 491 491 316 316 491 435 321 321 321 354 159 159 159 159 449 183 451 30 30 30 464 254 254 196 196 309 309 479 463 463 463 463 463 29 29 29 406 467 467 154 154 154 259 96 66 68 68 105 105 336 470 470 151 151 178 35 321 75 272 191 191 236 36 377 87 87 87 88 121 121 121 33 394 212 107 395 153 153 387 387 146 173 216 22 283 455 38 162 342 224 494 494 494 162 232 68 172 115 273 265 265 265 265 85 85 469 469 469 449 449 41 41 324 464 69 223 130 280 44 44 44 251 241 431 278 278 134 26 302 497 497 416 259 144 79 498 498 498 498 498 134 302 302 375 375 375 98 13 13 13 491 170 312 312 312 312 341 341 12 12 12 12 12 12 260 260 260 260 260 260 391 391 391 321 321 289 320 7 364 109 109 278 278 399 217 473 136 136 136 116 33 133 250 347 347 347 347 347 8 8 354 180 486 376 376 460 240 285 34 255 340 116 94 331 331 230 230 230 169 349 352 352 340 340 340 94 199 106 297 297 297 297 293 122 458 465 144 27 370 370 370 370 370 348 64 76 310 436 60 60 298 275 379 471 471 77 433 97 427 82 247 126 126 326 326 326 326 326 101 101 149 149 228 321 321 321 320 159 159 159 159 457 457 251 251 241 431 278 278 285 302 302 497 497 497 122 129 259 144 498 498 498 498 498 498 134 302 302 375 375 375 375 375 185 269 323 323 97 97 225 397 345 347 347 347 347 245 245 43 364 276 276 109 498 498 498 498 498 59 396 271 186 39 54 390 390 390 390 18 112 439 56 56 421 128 491 193 193 17 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/hidden_unit/train_sample100.tsv b/SpeechT5/SpeechLM/dataset/LibriSpeech/hidden_unit/train_sample100.tsv new file mode 100644 index 0000000000000000000000000000000000000000..77da1d382251d74e02d73cce8e2e35ec6e1f8f0c --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/hidden_unit/train_sample100.tsv @@ -0,0 +1,101 @@ +/LocalData/dataset/LibriSpeech +train-clean-100/103/1240/103-1240-0000.flac 225360 +train-clean-100/103/1240/103-1240-0001.flac 255120 +train-clean-100/103/1240/103-1240-0002.flac 223120 +train-clean-100/103/1240/103-1240-0003.flac 235360 +train-clean-100/103/1240/103-1240-0004.flac 200240 +train-clean-100/103/1240/103-1240-0005.flac 242800 +train-clean-100/103/1240/103-1240-0006.flac 153280 +train-clean-100/103/1240/103-1240-0007.flac 240560 +train-clean-100/103/1240/103-1240-0008.flac 246960 +train-clean-100/103/1240/103-1240-0009.flac 160480 +train-clean-100/103/1240/103-1240-0010.flac 236880 +train-clean-100/103/1240/103-1240-0011.flac 234480 +train-clean-100/103/1240/103-1240-0012.flac 243040 +train-clean-100/103/1240/103-1240-0013.flac 244160 +train-clean-100/103/1240/103-1240-0014.flac 223360 +train-clean-100/103/1240/103-1240-0015.flac 60960 +train-clean-100/103/1240/103-1240-0016.flac 250640 +train-clean-100/103/1240/103-1240-0017.flac 229040 +train-clean-100/103/1240/103-1240-0018.flac 185760 +train-clean-100/103/1240/103-1240-0019.flac 246480 +train-clean-100/103/1240/103-1240-0020.flac 214640 +train-clean-100/103/1240/103-1240-0021.flac 236960 +train-clean-100/103/1240/103-1240-0022.flac 262000 +train-clean-100/103/1240/103-1240-0023.flac 194400 +train-clean-100/103/1240/103-1240-0024.flac 244320 +train-clean-100/103/1240/103-1240-0025.flac 241920 +train-clean-100/103/1240/103-1240-0026.flac 133360 +train-clean-100/103/1240/103-1240-0027.flac 223440 +train-clean-100/103/1240/103-1240-0028.flac 250400 +train-clean-100/103/1240/103-1240-0029.flac 244320 +train-clean-100/103/1240/103-1240-0030.flac 232320 +train-clean-100/103/1240/103-1240-0031.flac 269760 +train-clean-100/103/1240/103-1240-0032.flac 236400 +train-clean-100/103/1240/103-1240-0033.flac 230640 +train-clean-100/103/1240/103-1240-0034.flac 246480 +train-clean-100/103/1240/103-1240-0035.flac 256720 +train-clean-100/103/1240/103-1240-0036.flac 200320 +train-clean-100/103/1240/103-1240-0037.flac 237040 +train-clean-100/103/1240/103-1240-0038.flac 114480 +train-clean-100/103/1240/103-1240-0039.flac 230800 +train-clean-100/103/1240/103-1240-0040.flac 234720 +train-clean-100/103/1240/103-1240-0041.flac 216160 +train-clean-100/103/1240/103-1240-0042.flac 249680 +train-clean-100/103/1240/103-1240-0043.flac 236160 +train-clean-100/103/1240/103-1240-0044.flac 262240 +train-clean-100/103/1240/103-1240-0045.flac 250800 +train-clean-100/103/1240/103-1240-0046.flac 222800 +train-clean-100/103/1240/103-1240-0047.flac 206320 +train-clean-100/103/1240/103-1240-0048.flac 236320 +train-clean-100/103/1240/103-1240-0049.flac 244560 +train-clean-100/103/1240/103-1240-0050.flac 224400 +train-clean-100/103/1240/103-1240-0051.flac 245760 +train-clean-100/103/1240/103-1240-0052.flac 236640 +train-clean-100/103/1240/103-1240-0053.flac 218640 +train-clean-100/103/1240/103-1240-0054.flac 261360 +train-clean-100/103/1240/103-1240-0055.flac 179920 +train-clean-100/103/1240/103-1240-0056.flac 229040 +train-clean-100/103/1240/103-1240-0057.flac 109680 +train-clean-100/103/1241/103-1241-0000.flac 255440 +train-clean-100/103/1241/103-1241-0001.flac 248800 +train-clean-100/103/1241/103-1241-0002.flac 249040 +train-clean-100/103/1241/103-1241-0003.flac 222160 +train-clean-100/103/1241/103-1241-0004.flac 236080 +train-clean-100/103/1241/103-1241-0005.flac 224400 +train-clean-100/103/1241/103-1241-0006.flac 243760 +train-clean-100/103/1241/103-1241-0007.flac 242320 +train-clean-100/103/1241/103-1241-0008.flac 242160 +train-clean-100/103/1241/103-1241-0009.flac 222400 +train-clean-100/103/1241/103-1241-0010.flac 253920 +train-clean-100/103/1241/103-1241-0011.flac 231760 +train-clean-100/103/1241/103-1241-0012.flac 239680 +train-clean-100/103/1241/103-1241-0013.flac 236960 +train-clean-100/103/1241/103-1241-0014.flac 242080 +train-clean-100/103/1241/103-1241-0015.flac 224160 +train-clean-100/103/1241/103-1241-0016.flac 234640 +train-clean-100/103/1241/103-1241-0017.flac 254240 +train-clean-100/103/1241/103-1241-0018.flac 150960 +train-clean-100/103/1241/103-1241-0019.flac 48400 +train-clean-100/103/1241/103-1241-0020.flac 155360 +train-clean-100/103/1241/103-1241-0021.flac 242880 +train-clean-100/103/1241/103-1241-0022.flac 261600 +train-clean-100/103/1241/103-1241-0023.flac 266720 +train-clean-100/103/1241/103-1241-0024.flac 254240 +train-clean-100/103/1241/103-1241-0025.flac 77280 +train-clean-100/103/1241/103-1241-0026.flac 176080 +train-clean-100/103/1241/103-1241-0027.flac 238080 +train-clean-100/103/1241/103-1241-0028.flac 248880 +train-clean-100/103/1241/103-1241-0029.flac 244960 +train-clean-100/103/1241/103-1241-0030.flac 247520 +train-clean-100/103/1241/103-1241-0031.flac 209600 +train-clean-100/103/1241/103-1241-0032.flac 224080 +train-clean-100/103/1241/103-1241-0033.flac 251920 +train-clean-100/103/1241/103-1241-0034.flac 270560 +train-clean-100/103/1241/103-1241-0035.flac 248800 +train-clean-100/103/1241/103-1241-0036.flac 249040 +train-clean-100/103/1241/103-1241-0037.flac 204400 +train-clean-100/103/1241/103-1241-0038.flac 238960 +train-clean-100/103/1241/103-1241-0039.flac 258160 +train-clean-100/103/1241/103-1241-0040.flac 220560 +train-clean-100/103/1241/103-1241-0041.flac 252240 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/phone_unit/dict.phn.txt b/SpeechT5/SpeechLM/dataset/LibriSpeech/phone_unit/dict.phn.txt new file mode 100644 index 0000000000000000000000000000000000000000..47b7a03cc4b736752fd0ee578a56fafcb0e242b3 --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/phone_unit/dict.phn.txt @@ -0,0 +1,364 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/phone_unit/train_sample100.phn b/SpeechT5/SpeechLM/dataset/LibriSpeech/phone_unit/train_sample100.phn new file mode 100644 index 0000000000000000000000000000000000000000..6550e52f92d0b35fa7bdc3b0676046893840b7ef --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/phone_unit/train_sample100.phn @@ -0,0 +1,100 @@ +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 111 111 111 111 111 111 111 111 111 111 111 111 37 37 37 37 37 37 37 273 273 273 273 273 273 289 289 289 289 289 144 144 144 144 144 144 144 331 331 331 331 331 331 331 331 331 331 53 53 53 53 53 53 53 53 53 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 113 113 49 49 49 49 49 49 224 224 224 223 223 223 223 223 223 223 223 223 223 193 193 193 193 193 193 193 193 193 233 233 233 233 233 233 233 233 233 116 116 116 116 116 116 116 116 116 1 1 1 187 187 187 187 187 187 187 187 187 340 340 340 340 340 279 279 279 279 279 279 279 49 49 49 49 49 273 273 273 273 273 273 273 273 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 341 341 341 341 341 341 341 341 341 341 341 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 189 189 189 189 189 189 189 189 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 113 113 113 113 113 113 113 49 49 49 49 49 49 224 224 224 223 223 223 223 223 223 223 223 223 223 223 223 193 193 193 193 193 193 193 193 193 193 193 233 233 233 233 233 233 233 233 233 233 233 233 116 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 223 223 223 223 223 223 223 223 223 223 193 193 193 193 193 193 329 329 329 329 329 329 116 116 116 1 1 1 1 1 1 1 1 215 215 215 215 215 215 215 215 215 215 215 215 53 53 53 53 53 53 53 53 53 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 331 331 331 331 133 133 133 133 133 133 276 276 276 276 276 276 119 119 119 119 119 204 204 204 204 204 204 204 204 204 204 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 329 329 329 329 329 49 49 49 49 233 233 233 233 233 233 233 233 233 233 225 225 225 225 225 212 212 212 212 212 212 212 212 212 212 212 212 212 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 232 232 232 232 232 232 275 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 115 115 115 115 115 115 115 193 193 193 193 193 193 193 273 273 273 273 273 273 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 115 115 115 115 115 115 115 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 232 187 187 187 233 233 233 233 289 289 289 289 289 289 289 289 320 320 320 320 320 320 50 50 50 50 50 50 50 223 223 223 223 223 223 193 193 193 193 193 289 289 289 289 289 49 49 49 49 224 224 224 224 224 224 224 179 179 179 179 179 179 179 179 179 179 179 179 21 21 21 21 21 21 225 225 225 225 225 225 225 225 225 225 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 171 171 171 171 171 171 171 171 171 171 171 171 171 171 277 277 277 277 277 193 193 193 193 193 233 233 233 233 233 233 233 233 217 217 217 217 217 217 217 217 217 116 116 116 331 331 331 331 331 189 189 189 189 292 292 292 292 292 292 67 67 67 67 67 67 67 67 67 67 67 67 67 67 225 225 225 225 225 225 117 117 117 117 117 117 117 145 145 145 145 145 145 145 145 145 145 340 340 340 340 340 340 340 340 47 47 47 47 47 47 233 233 233 233 233 116 116 116 116 116 223 223 223 223 165 165 165 165 165 165 165 165 165 165 165 165 117 117 117 117 117 205 205 205 205 205 205 205 205 205 340 340 340 340 340 340 340 340 340 191 191 191 191 191 191 191 277 277 277 277 277 277 277 117 117 117 117 117 117 277 277 277 277 277 25 25 25 25 25 25 25 25 273 273 273 273 273 273 273 273 280 280 280 280 280 280 280 280 47 47 47 47 233 233 233 233 116 116 116 287 287 287 287 287 277 277 277 277 49 49 49 49 329 329 329 329 329 149 149 149 149 149 149 149 149 149 149 149 149 149 149 281 281 281 281 281 281 288 288 288 288 288 288 288 107 107 107 107 107 107 100 100 100 100 100 100 100 100 100 100 50 50 50 50 50 50 107 107 107 107 107 107 107 107 107 107 107 107 277 277 277 277 277 305 305 305 305 305 305 305 305 305 220 220 220 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 49 49 49 288 288 288 288 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 116 116 116 116 116 191 191 191 191 191 191 289 289 289 289 289 289 280 280 280 280 279 279 279 279 279 279 279 279 279 279 279 279 69 69 69 69 69 69 69 69 69 277 277 277 277 277 277 277 277 277 280 280 280 280 280 280 280 280 280 47 47 47 47 47 47 333 333 333 333 333 333 333 333 333 333 333 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 107 107 107 107 107 107 107 107 107 107 37 37 37 37 37 37 37 37 37 220 220 220 220 220 220 220 220 220 220 187 187 187 187 232 232 232 232 232 119 119 119 52 52 52 52 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 305 305 305 305 305 117 117 117 117 117 117 340 340 340 340 340 47 47 47 328 328 328 328 119 119 119 119 119 204 204 204 204 204 204 204 204 247 247 247 247 247 247 247 247 247 225 225 225 225 225 116 116 116 116 116 116 116 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 53 293 293 293 293 293 293 293 293 293 293 109 109 109 109 109 145 145 145 145 145 145 145 145 145 288 288 288 288 288 288 271 271 271 271 271 271 271 271 271 225 225 225 225 165 165 165 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 187 288 288 288 288 288 288 331 331 331 331 49 49 49 49 49 340 340 340 340 340 340 340 275 275 275 275 189 189 189 189 273 273 273 273 273 273 273 273 337 337 337 337 321 321 321 321 321 321 321 289 289 289 289 289 189 189 189 189 189 189 189 116 116 116 116 287 287 287 188 188 188 188 107 107 107 107 107 107 208 208 208 208 208 208 208 208 208 47 47 47 232 232 232 232 232 232 232 232 232 191 191 191 191 191 191 191 233 233 233 233 233 233 233 233 289 289 289 289 289 289 289 277 277 277 49 49 49 49 221 221 221 221 221 221 221 49 49 49 49 49 49 49 49 49 288 288 288 288 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 133 133 133 133 133 133 117 117 117 117 117 117 117 117 117 117 225 225 225 225 225 225 73 73 73 73 73 73 73 73 73 73 236 236 236 236 236 236 236 236 236 236 236 236 236 107 107 107 107 107 277 277 277 277 277 277 305 305 305 305 305 305 305 220 220 220 220 220 220 220 220 220 220 187 187 187 187 232 232 232 232 187 187 187 289 289 289 289 289 280 280 280 280 280 1 1 1 147 147 147 147 147 147 147 147 147 147 147 147 147 225 225 225 225 225 205 205 205 205 205 144 144 144 144 144 144 144 144 219 219 219 219 219 219 219 219 219 219 219 219 69 69 69 69 69 69 69 69 277 277 277 277 277 277 280 280 280 280 280 280 280 280 291 291 291 291 291 291 277 277 277 277 320 320 320 320 119 119 119 119 119 249 249 249 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 340 340 340 331 331 331 331 331 331 331 331 305 305 305 305 305 305 305 117 117 117 117 117 117 117 117 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 193 193 193 193 292 292 292 292 292 292 292 292 292 292 292 292 292 292 115 115 115 115 115 115 115 115 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 277 220 220 220 220 220 220 220 220 220 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 221 221 221 221 221 221 221 277 277 277 277 277 189 189 189 189 289 289 289 289 289 289 280 280 280 280 280 280 47 47 47 47 328 328 328 328 328 328 328 328 271 271 271 271 271 271 271 271 271 271 271 271 271 321 321 321 321 321 321 321 321 321 321 321 224 224 224 224 224 224 224 224 224 224 224 224 224 47 47 47 47 47 233 233 233 233 116 116 116 116 116 219 219 219 219 219 219 219 219 219 219 219 33 33 33 33 33 33 33 33 33 281 281 281 281 281 281 281 281 281 281 281 221 221 221 221 221 221 221 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 53 53 53 53 53 53 53 288 288 288 288 288 288 288 1 1 1 107 107 107 107 100 100 100 100 100 100 100 100 100 119 119 119 119 48 48 48 48 287 287 287 287 287 287 287 287 287 287 287 287 287 101 101 101 101 101 101 101 101 101 228 228 228 228 228 228 228 187 187 187 288 288 288 288 288 288 288 275 275 275 275 275 275 275 275 275 209 209 209 209 209 209 209 113 113 113 113 113 113 113 113 113 288 288 288 288 288 223 223 223 223 223 223 223 223 193 193 193 193 193 193 193 193 233 233 233 233 233 233 233 117 117 117 117 340 340 340 340 340 179 179 179 179 179 179 179 179 179 179 179 21 21 21 21 21 21 225 225 225 225 225 225 225 225 225 225 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 191 191 191 191 191 191 191 191 191 191 288 288 288 288 288 288 288 288 331 331 331 331 331 49 49 49 49 340 340 340 340 340 50 50 50 50 50 219 219 219 219 219 219 219 219 219 219 219 219 333 333 333 333 333 333 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 49 49 49 49 49 49 49 288 288 288 288 288 288 288 1 1 1 331 331 331 331 331 331 331 331 331 331 331 133 133 133 133 133 133 224 224 224 224 224 224 224 224 219 219 219 219 219 219 219 49 49 49 49 233 233 233 233 233 233 233 117 117 117 117 117 117 53 53 53 53 53 53 53 53 221 221 221 221 221 221 289 289 289 289 289 289 49 49 49 49 49 49 116 116 116 116 116 116 223 223 223 223 223 193 193 193 289 289 289 289 289 49 49 49 49 224 224 224 224 224 279 279 279 279 279 279 279 289 289 289 289 289 289 289 277 277 277 277 277 209 209 209 209 209 209 209 209 209 209 209 209 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 171 171 171 171 171 171 171 171 171 171 171 171 69 69 69 69 276 276 276 276 276 276 231 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 288 288 288 288 288 288 207 207 207 207 207 207 207 207 207 207 329 329 329 329 329 329 189 189 189 189 232 232 232 232 232 50 50 50 50 50 50 107 107 107 107 107 107 107 107 107 107 107 277 277 277 277 277 277 277 277 277 305 305 305 305 305 305 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 1 1 1 1 219 219 219 219 219 219 219 219 219 305 305 305 305 116 116 116 116 116 116 275 275 275 275 275 275 275 275 275 275 53 53 53 53 53 53 53 232 232 232 232 232 232 232 232 232 271 271 271 271 271 271 271 271 271 271 271 37 37 37 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 288 288 288 288 227 227 227 227 227 193 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 224 224 224 223 223 223 223 223 223 223 223 223 223 223 193 193 193 193 193 233 233 233 233 233 233 233 233 233 233 117 117 117 117 340 340 340 340 340 340 340 115 115 115 115 115 115 69 69 69 69 69 69 276 276 276 276 276 276 331 331 331 331 331 331 331 189 189 189 121 121 121 121 121 85 85 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 115 115 115 115 115 320 320 320 320 320 320 320 320 320 320 320 275 275 275 275 275 275 275 189 189 189 189 189 177 177 177 177 177 177 177 21 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 116 116 116 116 116 116 171 171 171 171 171 171 171 171 144 144 144 144 144 144 115 115 115 115 115 115 115 115 115 115 209 209 209 209 209 209 209 209 209 281 281 281 281 281 281 281 281 49 49 49 49 233 233 233 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 281 204 204 204 204 204 204 204 204 204 204 204 204 204 204 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 115 115 115 189 189 189 189 221 221 221 221 221 221 221 221 221 221 221 69 69 69 69 69 69 69 69 69 69 277 277 277 277 277 277 277 277 277 49 49 49 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 187 288 288 288 288 288 288 271 271 271 271 271 271 271 277 277 277 277 277 277 21 21 21 21 109 109 109 109 109 109 49 49 49 49 49 49 109 109 109 109 109 225 225 225 225 225 225 204 204 204 204 204 204 204 204 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 219 219 219 219 219 219 219 219 219 219 219 219 21 21 21 21 21 21 21 21 233 233 233 233 233 233 233 233 285 285 285 285 285 285 285 285 49 49 49 49 49 49 49 49 49 49 280 280 280 280 280 280 280 280 280 119 119 119 119 119 49 49 49 49 49 288 288 288 288 288 288 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 189 189 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 113 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 279 279 279 279 279 279 279 279 193 193 193 193 289 289 289 289 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 35 35 35 35 288 288 288 288 288 179 179 179 179 179 179 179 179 144 144 144 144 144 144 144 331 331 331 331 331 331 331 193 193 193 193 233 233 233 233 233 233 233 233 117 117 117 117 117 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 219 219 219 219 219 219 219 219 219 219 209 209 209 209 209 209 209 273 273 273 273 273 273 273 189 189 189 236 236 236 236 236 236 236 236 50 50 50 50 50 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 21 21 21 21 21 21 21 21 277 277 277 277 277 277 272 272 272 272 272 272 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 19 19 19 19 19 19 19 19 19 19 232 232 232 232 232 232 232 232 232 131 131 131 131 131 131 131 131 329 329 329 329 329 329 329 329 329 277 277 277 277 277 205 205 205 205 205 205 293 293 293 293 293 293 293 293 293 197 197 197 197 236 236 236 236 236 236 236 236 119 119 119 49 49 49 49 288 288 288 288 288 271 271 271 271 271 271 271 271 271 271 271 271 271 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 171 171 171 171 171 171 171 171 145 145 145 145 145 145 145 228 228 228 228 228 107 107 107 107 277 277 277 277 305 305 305 305 305 305 221 221 221 221 221 221 221 280 280 280 280 280 280 47 47 47 47 47 233 233 233 116 116 116 116 111 111 111 111 111 193 193 193 193 225 225 225 225 225 225 225 225 117 117 117 117 117 277 277 277 49 49 49 232 232 232 232 232 232 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 272 272 272 272 272 272 272 272 272 272 272 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 119 119 119 37 37 37 37 37 37 37 37 288 288 288 288 288 187 187 187 187 187 187 172 172 172 172 172 283 283 283 283 283 283 283 208 208 208 208 208 208 231 231 231 231 231 231 231 231 249 249 249 249 249 249 249 249 249 289 289 289 289 49 49 49 49 49 49 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 131 131 131 131 131 131 131 233 233 233 233 233 233 233 205 205 205 205 205 205 205 293 293 293 293 293 293 197 197 197 236 236 236 236 236 236 236 1 1 1 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 116 116 116 1 1 1 67 67 67 67 67 67 67 67 276 276 276 276 276 276 276 276 276 276 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 47 47 47 47 328 328 328 328 271 271 271 271 271 271 271 271 271 271 271 271 271 271 225 225 225 225 225 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 331 331 331 331 331 331 331 305 305 305 305 116 116 116 116 116 231 231 231 231 231 231 231 231 231 133 133 133 133 133 133 329 329 329 329 329 144 144 144 144 144 144 144 144 144 275 275 275 275 275 275 275 275 275 133 133 133 133 133 133 133 133 133 133 281 281 281 281 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 47 47 47 47 233 233 233 233 233 233 233 289 289 289 289 289 289 193 193 193 193 193 224 224 224 224 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 179 179 179 37 37 37 37 116 116 116 116 116 116 171 171 171 171 171 171 171 171 133 133 133 133 133 277 277 277 277 277 277 277 277 49 49 49 49 49 289 289 289 289 289 289 189 189 189 116 116 116 116 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 288 288 119 119 119 52 52 52 52 52 52 331 331 331 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 340 340 340 340 340 340 340 340 47 47 47 47 233 233 233 233 116 116 116 116 116 116 116 331 331 331 331 331 331 331 133 133 133 133 133 133 277 277 277 277 277 277 277 277 173 173 173 173 173 173 173 173 173 173 173 173 73 73 73 73 73 73 277 277 277 277 277 277 340 340 340 340 340 340 340 340 119 119 119 119 119 137 137 137 137 137 277 277 277 277 277 277 277 277 277 277 277 277 277 277 277 53 53 53 53 53 53 53 53 53 53 53 53 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 133 133 133 276 276 276 19 19 19 276 276 276 276 276 276 271 271 271 271 271 271 271 271 271 271 225 225 225 225 133 133 133 133 133 133 233 233 233 233 233 289 289 289 289 204 204 204 204 204 47 47 47 328 328 328 328 328 271 271 271 271 271 271 271 271 209 209 209 209 209 209 209 209 273 273 273 273 273 49 49 49 224 224 224 224 224 224 224 191 191 191 191 191 191 191 191 191 191 191 191 232 232 232 232 232 232 232 35 35 35 35 35 35 35 35 35 35 35 329 329 329 329 329 49 49 49 49 49 49 233 233 233 233 233 233 233 233 225 225 225 225 225 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 35 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 116 116 116 116 116 116 83 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 47 47 47 47 328 328 328 328 328 328 328 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 320 320 320 320 219 219 219 219 219 219 219 49 49 49 49 232 232 232 232 232 47 47 47 47 47 289 289 289 289 289 289 289 289 289 289 289 289 133 133 133 133 133 133 133 233 233 233 233 233 116 116 116 116 116 116 116 116 116 219 219 219 219 219 219 219 219 219 219 225 225 225 225 225 249 249 249 249 249 249 249 249 281 281 281 281 281 281 281 281 281 281 281 281 225 225 225 225 204 204 204 204 204 204 287 287 287 287 287 287 188 188 188 188 119 119 119 133 133 133 276 276 276 276 276 276 276 231 231 231 231 231 231 165 165 165 165 165 165 165 165 165 165 109 109 109 109 109 109 109 145 145 145 145 145 340 340 340 340 340 340 340 340 340 340 107 107 107 107 107 107 193 193 193 193 193 341 341 341 341 341 341 341 341 341 233 233 233 233 233 49 49 49 49 49 49 49 49 280 280 280 280 280 280 280 280 280 107 107 107 107 107 107 107 100 100 100 100 100 100 100 100 100 100 100 115 115 115 115 115 115 115 115 115 193 193 193 193 193 233 233 233 233 233 233 233 288 288 288 288 288 47 47 47 47 328 328 328 328 328 231 231 231 231 189 189 189 189 177 177 177 177 177 177 177 225 225 225 225 133 133 133 133 133 133 221 221 221 221 221 221 221 289 289 289 289 189 189 189 236 236 236 236 236 236 236 119 119 119 119 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 276 276 247 247 247 247 247 247 247 247 247 247 247 247 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 53 53 53 288 288 288 288 288 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 49 224 224 224 223 223 223 223 223 223 223 223 223 223 223 223 193 193 193 193 193 193 193 193 233 233 233 233 233 233 233 116 116 116 116 116 331 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 331 331 331 331 331 331 53 53 53 53 232 232 232 232 47 47 47 47 47 328 328 328 119 119 119 119 119 249 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 340 219 219 219 219 219 219 219 219 219 219 219 219 165 165 165 165 165 165 165 165 165 165 273 273 273 49 49 49 49 49 109 109 109 109 109 109 109 49 49 49 49 224 224 224 224 224 224 224 224 219 219 219 219 219 219 219 219 219 219 219 219 219 277 277 277 277 277 277 209 209 209 209 209 209 209 113 113 113 113 113 113 113 113 113 113 113 113 113 145 145 145 145 145 145 145 145 340 340 340 340 340 340 340 340 179 179 179 179 320 320 320 219 219 219 219 219 219 219 49 49 49 49 49 232 232 232 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 233 233 233 233 233 49 49 49 49 49 49 216 216 216 216 216 216 216 216 216 216 216 216 119 119 119 133 133 133 133 276 276 276 276 276 276 276 276 247 247 247 247 247 247 247 247 247 247 247 247 247 232 232 232 232 232 219 219 219 219 219 219 219 49 49 49 49 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 281 281 281 149 149 149 149 149 149 149 149 149 149 149 233 233 233 233 233 233 233 340 340 340 340 340 340 340 35 35 35 35 35 233 233 233 233 233 233 116 116 116 119 119 119 119 249 249 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 47 47 47 47 47 328 328 328 328 328 328 328 51 51 51 51 51 121 121 121 121 121 121 144 144 144 144 171 171 171 171 171 171 171 171 171 171 249 249 249 249 249 249 249 249 249 249 249 249 221 221 221 221 221 221 221 280 280 280 280 280 280 187 187 187 187 233 233 233 233 289 289 289 289 48 48 48 119 119 119 119 52 52 52 107 107 107 107 107 21 21 21 21 21 21 21 21 277 277 277 277 277 277 177 177 177 177 177 177 177 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 331 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 50 50 50 50 231 231 231 231 231 231 249 249 249 249 249 249 249 289 289 289 289 49 49 49 109 109 109 49 49 49 224 224 224 224 224 224 224 179 179 179 179 179 179 179 85 85 85 85 85 85 85 85 85 281 281 281 281 281 281 281 281 281 281 281 333 333 333 333 333 333 333 105 105 105 105 105 105 105 105 105 105 105 105 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 144 144 144 144 144 144 331 331 331 331 331 331 331 331 331 331 331 331 331 331 149 149 149 149 149 149 149 220 220 220 220 220 220 220 220 220 331 331 331 331 49 49 49 340 340 340 340 340 340 340 340 67 67 67 67 67 67 67 67 225 225 225 225 225 225 225 333 333 333 333 333 333 333 333 333 205 205 205 205 205 340 340 340 340 340 340 340 115 115 115 115 115 115 115 115 53 53 53 53 53 53 53 53 53 53 53 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 116 116 116 116 331 331 331 331 331 331 331 331 133 133 133 133 133 133 133 224 224 224 224 224 224 224 224 224 224 224 115 115 115 115 115 115 115 115 53 53 53 53 53 53 53 53 53 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 275 275 275 275 275 275 275 275 275 275 275 275 275 275 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 119 119 119 119 119 48 48 48 48 48 48 279 279 279 279 279 279 279 279 279 279 279 249 249 249 249 249 249 249 249 189 189 189 189 189 189 236 236 236 236 236 236 236 236 279 279 279 279 279 279 279 279 149 149 149 149 149 149 149 149 221 221 221 221 221 221 221 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 133 133 133 133 133 225 225 225 225 225 225 225 273 273 273 273 273 273 273 288 288 288 288 288 288 275 275 275 275 275 275 275 275 275 275 53 53 53 53 53 53 53 53 232 232 232 232 232 232 232 232 119 119 119 48 48 48 48 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 233 233 233 233 233 233 233 117 117 117 117 117 168 168 168 168 168 168 168 168 168 168 279 279 279 279 279 279 279 279 279 279 221 221 221 221 221 221 221 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 224 224 224 224 224 224 224 224 224 224 224 224 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 116 331 331 331 331 331 331 49 49 49 49 49 340 340 340 340 340 340 340 119 119 119 48 48 48 48 48 48 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 289 277 277 277 69 69 69 69 69 69 69 237 237 237 237 237 237 237 177 177 177 177 49 49 49 49 49 281 281 281 281 281 281 281 288 288 288 288 288 288 288 271 271 271 271 271 271 271 271 277 277 277 277 277 277 277 21 21 21 21 21 21 21 21 272 272 272 272 272 272 272 272 47 47 47 47 328 328 328 1 1 1 119 119 119 119 52 52 52 52 52 111 111 111 111 111 111 111 111 111 111 111 111 111 111 149 149 149 149 149 149 149 149 149 112 112 112 112 112 112 112 112 112 112 112 1 1 1 163 163 163 163 163 163 163 163 163 163 163 163 163 163 163 163 163 116 116 116 116 116 279 279 279 279 279 279 49 49 49 49 49 281 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 49 49 49 49 49 289 289 289 289 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 116 171 171 171 171 171 171 171 171 171 171 171 69 69 69 69 69 69 69 69 69 277 277 277 277 277 277 49 49 49 49 232 232 232 227 227 227 227 227 227 227 227 193 193 193 193 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 49 49 233 233 233 233 233 233 233 233 233 340 340 340 340 340 15 15 15 15 15 15 15 177 177 177 177 177 177 177 177 177 341 341 341 341 341 341 341 193 193 193 193 225 225 225 225 225 225 225 337 337 337 337 337 145 145 145 145 145 145 145 145 145 145 145 145 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 335 335 335 335 335 335 335 133 133 133 133 133 133 288 288 288 288 288 288 288 331 331 331 331 331 189 189 189 189 189 120 120 120 120 120 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 119 119 119 119 119 193 193 193 193 193 193 193 193 193 193 193 193 193 193 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 227 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 275 165 165 165 165 165 165 113 113 113 113 113 113 113 49 49 49 49 49 49 224 224 224 224 224 171 171 171 171 171 171 171 171 171 171 85 85 85 85 85 85 85 85 85 85 85 85 233 233 233 233 116 116 116 116 116 1 1 1 47 47 47 47 47 47 47 47 109 109 109 109 109 109 109 109 109 109 53 53 53 53 53 53 53 233 233 233 233 233 233 233 233 233 233 233 233 233 117 117 117 117 117 49 49 49 49 49 49 233 233 233 233 233 233 233 288 288 288 288 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 101 101 101 101 101 101 101 101 101 101 101 101 101 228 228 228 228 228 228 287 287 287 287 287 48 48 48 48 279 279 279 279 279 279 279 279 279 193 193 193 193 288 288 288 288 288 288 288 171 171 171 171 171 171 171 144 144 144 144 144 144 144 144 144 144 144 144 144 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 277 277 277 277 277 277 277 277 340 340 340 340 340 340 340 340 340 35 35 35 35 288 288 288 288 179 179 179 179 179 179 144 144 144 144 144 219 219 219 219 219 219 219 219 219 193 193 193 193 193 113 113 113 113 113 113 113 113 113 113 113 49 49 49 49 232 232 232 232 232 232 232 232 331 331 331 331 331 331 193 193 193 193 233 233 233 233 233 233 233 117 117 117 117 117 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 231 231 231 231 231 231 231 231 231 231 193 193 193 193 193 289 289 289 289 289 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 236 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 21 21 21 21 21 21 21 21 289 289 289 289 289 289 49 49 49 232 232 232 232 232 232 232 232 232 232 331 331 331 331 331 331 331 331 69 69 69 69 69 69 69 277 277 277 277 277 272 272 272 272 272 272 272 272 272 272 272 219 219 219 219 219 219 219 219 219 219 219 219 333 333 333 333 333 193 193 193 193 225 225 225 225 225 225 225 225 225 225 225 225 289 289 289 289 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 179 179 179 179 179 37 37 37 37 116 116 116 116 231 231 231 231 231 193 193 193 193 193 193 193 289 289 289 289 289 189 189 189 189 189 189 116 116 116 116 116 116 279 279 279 279 279 279 279 279 279 193 193 193 193 221 221 221 221 221 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 209 209 209 209 209 209 209 232 232 232 232 232 47 47 47 47 328 328 328 328 328 119 119 119 119 119 119 49 49 49 49 49 49 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 340 340 340 340 340 340 340 340 35 35 35 35 35 35 35 35 35 35 35 329 329 329 329 329 49 49 49 49 49 233 233 233 225 225 225 225 225 225 212 212 212 212 212 212 212 179 179 179 179 179 179 179 179 179 179 179 85 85 85 85 85 85 85 85 281 281 281 281 281 281 281 281 221 221 221 221 221 221 221 213 213 213 213 213 213 213 213 213 273 273 273 273 273 273 145 145 145 145 145 145 145 340 340 340 340 340 340 340 331 331 331 331 331 144 144 144 144 144 331 331 331 331 331 331 331 331 331 331 331 331 331 331 249 249 249 233 233 233 233 233 288 288 288 287 287 287 287 188 188 188 188 188 287 287 287 287 287 287 287 287 287 287 287 287 287 133 133 133 133 133 133 224 224 224 224 224 224 224 224 224 224 224 224 187 187 187 187 187 232 232 232 232 232 232 232 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 116 116 116 116 116 327 327 327 327 327 327 327 327 327 327 265 265 265 265 265 265 265 265 265 265 265 265 281 281 281 281 281 281 281 281 281 281 281 281 281 189 189 189 189 189 189 189 189 189 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 116 219 219 219 219 219 219 219 219 219 219 209 209 209 209 209 209 209 209 209 273 273 273 273 273 189 189 189 189 189 189 236 236 236 236 236 236 236 236 50 50 50 50 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 21 21 21 21 21 21 21 277 277 277 277 277 277 272 272 272 272 272 272 272 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 19 19 19 19 19 19 19 232 232 232 232 232 232 119 119 119 52 52 52 227 227 227 227 227 227 227 227 227 227 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 232 232 232 232 275 275 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 116 116 116 116 116 116 119 119 119 119 119 49 49 49 288 288 288 288 219 219 219 219 219 219 219 219 219 219 277 277 277 277 277 69 69 69 69 69 69 69 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 119 119 119 52 52 52 52 179 179 179 179 179 179 179 179 179 179 179 179 21 21 21 21 225 225 225 225 225 225 225 225 225 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 47 233 233 233 233 116 116 116 116 116 116 116 331 331 331 331 331 331 331 331 331 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 233 233 233 233 233 116 116 116 116 116 51 51 51 51 51 51 51 51 272 272 272 272 272 272 272 272 272 119 119 119 119 52 52 52 52 52 279 279 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 209 209 209 209 209 209 209 209 209 272 272 272 272 272 272 272 275 275 275 275 275 275 275 275 275 275 275 275 275 275 133 133 133 133 133 133 116 116 116 116 116 116 179 179 179 179 179 179 179 179 179 179 179 179 179 179 193 193 193 193 193 193 224 224 224 224 224 224 224 224 224 224 224 107 107 107 107 107 107 107 213 213 213 213 213 213 213 213 213 213 213 69 69 69 69 69 69 69 69 69 69 69 69 69 233 233 233 233 233 233 233 233 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 131 131 131 131 131 131 131 131 131 131 131 233 233 233 233 233 233 233 205 205 205 205 205 205 205 109 109 109 109 109 109 49 49 49 49 49 49 49 117 117 117 117 204 204 204 204 204 204 204 204 179 179 179 179 179 179 179 179 179 320 320 320 320 320 320 331 331 331 331 331 331 331 331 133 133 133 133 133 133 233 233 233 233 233 233 233 233 288 288 288 288 83 83 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 47 47 47 47 47 328 328 328 328 328 328 187 187 187 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 276 276 276 276 276 276 276 276 276 276 191 191 191 191 191 191 191 233 233 233 233 233 233 233 233 233 289 289 289 289 289 289 289 316 316 316 316 316 316 316 316 316 316 316 187 187 187 187 187 187 187 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 116 116 116 116 287 287 287 287 287 188 188 188 188 188 271 271 271 271 271 271 271 271 271 271 37 37 37 37 37 37 37 37 37 37 37 37 37 280 280 280 280 280 280 280 280 280 280 280 280 247 247 247 247 247 247 247 247 247 329 329 329 329 329 144 144 144 144 144 144 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 288 288 288 288 179 179 179 179 179 179 179 179 179 179 179 193 193 193 193 193 193 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 275 275 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 116 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 35 35 35 35 35 35 35 35 233 233 233 233 233 116 116 116 116 116 116 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 275 275 275 275 275 275 275 275 275 275 53 53 53 53 53 53 232 232 232 232 232 232 232 232 232 119 119 119 119 119 204 204 204 204 204 204 204 47 47 47 47 47 47 47 47 47 47 47 47 233 233 233 233 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 281 281 281 281 281 209 209 209 209 209 209 209 209 209 232 232 232 232 232 232 232 232 232 232 175 175 175 175 175 175 69 69 69 69 69 69 69 69 69 233 233 233 233 233 233 233 233 289 289 289 289 289 289 289 289 225 225 225 225 49 49 49 49 49 49 288 288 288 288 47 47 47 47 47 47 328 328 328 328 328 328 328 227 227 227 227 227 227 227 193 193 193 193 281 281 281 281 281 281 281 281 281 189 189 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 275 275 275 165 165 165 165 165 165 165 165 113 113 113 113 113 113 113 113 49 49 49 49 49 49 49 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 340 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 279 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 189 189 189 189 189 189 189 189 189 189 189 189 189 189 189 236 236 236 236 236 236 236 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 331 331 331 331 331 49 49 49 340 340 340 340 340 279 279 279 279 279 279 279 193 193 193 193 193 193 289 289 289 289 189 189 189 189 236 236 236 236 236 236 236 236 236 119 119 119 119 133 133 133 133 133 276 276 276 276 276 276 276 276 331 331 331 331 331 331 331 331 331 331 53 53 53 53 232 232 232 232 232 232 39 39 39 39 39 39 39 173 173 173 173 173 173 173 289 289 289 289 289 289 145 145 145 145 145 233 233 233 233 233 321 321 321 321 321 321 321 321 321 321 321 232 232 232 232 232 187 187 187 187 232 232 232 232 232 232 1 1 1 147 147 147 147 147 147 147 147 147 147 147 147 147 147 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 215 215 215 215 215 215 215 215 215 215 215 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 48 48 48 48 48 48 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 53 53 53 53 232 232 232 232 232 232 232 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 219 219 219 219 219 219 219 219 219 53 53 53 53 53 229 229 229 229 229 229 189 189 189 236 236 236 236 236 236 236 236 236 236 236 191 191 191 191 232 232 232 232 232 35 35 35 288 288 288 288 288 288 119 119 119 52 52 52 52 52 331 331 331 331 331 331 331 193 193 193 193 193 233 233 233 233 233 233 233 233 233 117 117 117 117 117 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 1 1 1 331 331 331 331 331 331 69 69 69 69 69 69 69 69 69 69 277 277 277 277 277 277 277 277 228 228 228 228 228 228 228 47 47 47 233 233 233 116 116 116 116 116 116 107 107 107 107 107 107 277 277 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 204 204 204 204 204 204 204 204 67 67 67 67 67 67 67 67 67 67 277 277 277 277 277 277 277 113 113 113 113 113 113 113 113 113 113 145 145 145 145 116 116 116 116 116 19 19 19 19 19 19 19 19 232 232 232 232 232 119 119 119 48 48 48 48 48 279 279 279 279 279 279 279 279 279 279 279 279 279 225 225 225 225 225 249 249 249 249 249 249 249 249 249 249 272 272 272 272 272 272 272 107 107 107 189 189 189 189 225 225 225 225 225 225 225 225 225 225 225 225 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 119 119 119 52 52 52 52 52 179 179 179 179 179 179 179 179 179 179 179 179 179 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 280 280 280 280 280 280 280 280 280 280 280 280 280 331 331 331 331 49 49 49 340 340 340 340 340 340 340 187 187 187 232 232 232 232 232 50 50 50 50 50 107 107 107 107 107 107 107 107 277 277 277 277 277 277 277 277 101 101 101 101 101 101 101 101 117 117 117 117 49 49 49 49 49 224 224 224 224 224 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 225 225 225 225 225 53 53 53 53 53 53 53 53 53 53 284 284 284 284 284 284 284 284 284 284 284 284 284 47 47 47 47 328 328 328 328 328 328 328 328 328 271 271 271 271 271 271 271 271 271 271 271 193 193 193 237 237 237 237 237 237 237 237 237 237 237 237 221 221 221 221 221 221 221 221 204 204 204 204 204 204 204 204 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 1 1 1 107 107 107 107 107 225 225 225 225 225 225 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 53 53 53 53 53 229 229 229 229 229 229 229 229 229 229 229 229 229 229 116 116 116 116 116 116 247 247 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 329 144 144 144 144 144 144 107 107 107 107 107 107 107 107 107 100 100 100 100 100 100 100 100 100 100 100 50 50 50 50 50 227 227 227 227 227 227 227 227 227 227 193 193 193 193 193 193 277 277 277 277 277 277 277 277 277 277 205 205 205 205 205 205 205 205 49 49 49 49 116 116 116 116 47 47 47 47 328 328 328 328 328 328 328 1 1 1 107 107 107 107 107 107 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 287 287 287 287 287 287 287 287 287 287 287 287 21 21 21 21 21 21 21 229 229 229 229 229 229 49 49 49 49 49 280 280 280 280 280 280 280 280 280 280 280 223 223 223 223 223 223 223 223 193 193 193 193 193 193 193 193 233 233 233 233 233 233 233 233 233 233 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 50 50 50 50 50 50 50 50 50 50 50 50 50 227 227 227 227 227 227 227 227 227 227 209 209 209 209 209 209 209 209 220 220 220 220 220 220 220 220 223 223 223 223 223 223 223 223 193 193 193 289 289 289 289 289 49 49 49 49 224 224 224 224 224 227 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 37 37 232 232 232 232 232 232 232 179 179 179 179 179 179 179 321 321 321 228 228 228 228 228 228 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 329 329 329 329 329 49 49 49 49 233 233 233 233 233 225 225 225 225 225 212 212 212 212 212 212 212 212 212 271 271 271 271 271 271 271 271 271 271 209 209 209 209 209 209 209 209 209 209 273 273 273 273 273 273 273 49 49 49 224 224 224 224 224 224 224 224 219 219 219 219 219 219 219 219 219 219 69 69 69 69 69 69 69 69 69 69 225 225 225 225 225 225 116 116 116 116 116 116 116 116 275 275 275 275 275 275 275 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 223 223 223 223 223 223 223 223 223 193 193 193 193 193 193 193 233 233 233 233 233 233 233 233 233 233 233 117 117 117 117 340 340 340 340 179 179 179 179 179 179 179 179 179 179 53 53 53 53 53 53 53 341 341 341 341 341 341 341 341 341 109 109 109 109 49 49 49 49 49 233 233 233 233 233 233 233 233 233 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 49 49 49 49 340 340 340 340 340 279 279 279 279 279 279 279 279 279 249 249 249 249 249 249 249 249 249 249 189 189 189 189 189 189 189 236 236 236 236 236 236 236 179 179 179 179 179 179 193 193 193 193 193 340 340 340 340 340 340 223 223 223 223 223 223 223 223 223 165 165 165 165 165 165 165 165 165 165 288 288 288 288 288 288 287 287 287 287 287 287 287 287 287 287 287 149 149 149 149 149 149 149 233 233 233 233 233 49 49 49 49 49 272 272 272 272 272 279 279 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 116 116 116 116 116 19 19 19 19 19 19 19 232 232 232 232 232 119 119 119 52 52 52 179 179 179 179 179 179 179 179 179 179 193 193 193 193 193 193 224 224 224 224 224 224 224 224 171 171 171 171 171 171 171 171 171 171 171 171 171 209 209 209 209 209 209 209 209 225 225 225 225 225 225 225 225 225 225 116 116 116 116 116 116 1 1 1 107 107 107 107 107 213 213 213 213 213 213 213 213 213 69 69 69 69 69 69 69 69 69 69 69 233 233 233 233 233 233 233 116 116 116 116 119 119 119 52 52 52 52 107 107 107 107 107 107 107 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 277 277 277 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 340 275 275 275 275 275 165 165 165 165 165 165 113 113 113 113 113 49 49 49 49 49 224 224 224 224 224 224 224 231 231 231 231 231 337 337 337 337 337 337 337 337 320 320 320 320 320 320 119 119 119 119 119 49 49 49 49 288 288 288 288 179 179 179 179 179 179 179 208 208 208 208 208 208 208 208 208 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 288 288 288 288 288 288 288 288 288 1 1 1 107 107 107 107 189 189 189 189 189 189 221 221 221 221 221 221 221 221 221 49 49 49 49 49 49 49 49 49 340 340 340 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 208 179 179 179 179 179 179 37 37 37 37 116 116 116 179 179 179 179 179 179 179 179 179 179 179 179 149 149 149 149 149 149 149 149 149 116 116 116 179 179 179 179 179 179 193 193 193 228 228 228 228 228 228 228 228 287 287 287 287 287 287 287 287 287 133 133 133 133 133 133 224 224 224 224 224 224 224 224 224 271 271 271 271 271 271 271 209 209 209 209 209 209 209 289 289 289 289 289 289 144 144 144 144 144 227 227 227 227 227 227 227 227 227 227 69 69 69 69 69 69 69 277 277 277 277 189 189 189 189 189 189 281 281 281 281 281 281 281 281 281 49 49 49 49 232 232 232 232 232 119 119 119 119 204 204 204 204 204 207 207 207 207 207 207 207 207 207 207 207 207 329 329 329 329 329 329 233 233 233 189 189 189 236 236 236 236 236 236 236 107 107 107 107 107 189 189 189 189 173 173 173 173 173 173 173 173 173 173 173 173 173 69 69 69 69 69 69 69 69 69 69 69 69 69 276 276 276 276 276 276 276 276 276 1 1 1 187 187 187 187 187 187 187 187 187 187 187 187 187 187 187 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 193 193 193 193 225 225 225 225 225 337 337 337 337 337 337 337 337 49 49 49 49 49 228 228 228 228 228 228 228 215 215 215 215 215 215 215 215 164 164 164 164 164 164 164 164 164 164 164 164 164 164 107 107 107 107 107 107 107 225 225 225 225 225 225 133 133 133 133 133 133 133 277 277 277 277 277 277 277 277 277 340 340 340 340 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 69 69 69 69 69 69 69 69 69 69 69 276 276 276 276 276 276 276 276 276 276 276 276 276 276 247 247 247 247 247 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 329 144 144 144 144 144 144 35 35 35 35 288 288 288 219 219 219 219 219 219 219 219 219 219 219 21 21 21 21 21 277 277 277 277 277 229 229 229 229 229 229 49 49 49 117 117 117 117 117 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 37 37 37 37 37 288 288 288 288 179 179 179 179 179 179 179 179 179 179 208 208 208 208 208 227 227 227 227 227 227 227 227 133 133 133 133 233 233 233 233 233 233 233 233 288 288 288 287 287 287 287 287 287 287 188 188 188 188 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 179 179 179 179 179 189 189 189 340 340 340 340 340 340 287 287 287 287 287 287 287 287 287 287 287 149 149 149 149 149 149 149 233 233 233 233 233 49 49 49 49 272 272 272 272 272 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 209 116 116 116 119 119 119 48 48 48 48 231 231 231 231 231 133 133 133 133 133 133 221 221 221 221 221 221 221 281 281 281 288 288 288 288 288 288 39 39 39 39 39 39 39 173 173 173 173 173 173 173 289 289 289 289 289 145 145 145 145 233 233 233 233 233 233 321 321 321 321 321 321 321 321 321 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 271 271 271 271 271 271 271 209 209 209 209 209 209 209 209 289 289 289 289 289 144 144 144 144 144 144 179 179 179 179 179 37 37 37 116 116 116 116 116 35 35 35 35 35 35 35 35 35 35 35 35 35 281 281 281 281 281 281 281 281 281 281 221 221 221 288 288 288 187 187 187 187 228 228 228 228 228 47 47 47 47 47 328 328 328 328 328 328 219 219 219 219 219 219 219 69 69 69 69 69 69 69 69 277 277 277 277 277 277 277 277 277 277 280 280 280 280 280 280 280 280 280 280 171 171 171 171 171 171 171 171 144 144 144 144 144 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 316 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 293 293 293 293 293 293 293 293 109 109 109 109 109 109 145 145 145 145 145 145 145 145 145 145 288 288 288 179 179 179 179 179 179 179 179 179 37 37 37 37 116 116 116 116 116 231 231 231 231 231 231 231 231 231 133 133 133 133 133 133 329 329 329 329 144 144 144 144 144 144 144 107 107 107 107 107 107 107 107 193 193 193 193 232 232 232 232 231 231 231 231 231 231 231 231 231 249 249 249 249 249 249 249 249 249 249 249 249 232 232 232 232 232 287 287 287 287 287 287 287 287 320 320 320 320 327 327 327 327 327 327 327 25 25 25 25 25 25 25 25 225 225 225 225 225 225 49 49 49 49 233 233 233 233 233 289 289 289 289 289 289 289 289 289 289 193 193 193 193 193 276 276 276 276 276 276 276 195 195 195 233 233 233 233 233 173 173 173 173 173 173 173 145 145 145 229 229 229 229 229 229 229 165 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 49 49 49 232 232 232 47 47 47 109 109 109 109 109 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 1 1 1 131 131 131 131 131 131 131 131 131 233 233 233 233 233 233 205 205 205 205 205 205 205 293 293 293 293 293 293 293 293 293 293 197 197 197 197 197 197 236 236 236 236 236 236 236 236 236 236 236 236 236 191 191 191 191 191 232 232 232 179 179 179 179 179 189 189 189 189 189 340 340 340 340 340 340 179 179 179 179 179 249 249 249 249 249 249 249 249 224 224 224 224 224 224 223 223 223 223 223 223 223 223 223 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 335 335 335 335 335 335 335 335 133 133 133 133 133 133 133 133 133 133 133 133 133 133 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 209 209 209 209 209 209 209 276 276 276 276 276 276 276 331 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 293 293 293 293 293 293 293 293 293 293 109 109 109 109 109 109 145 145 145 145 145 145 145 145 145 145 145 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 288 288 288 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 37 172 172 172 172 172 172 172 172 172 271 271 271 271 271 271 271 271 271 271 271 37 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 291 291 291 291 291 291 291 277 277 277 277 277 277 277 208 208 208 208 208 208 208 208 208 208 208 208 208 19 19 19 19 19 19 19 232 232 232 232 232 119 119 119 119 119 204 204 204 204 204 204 39 39 39 39 39 39 39 39 39 39 173 173 173 173 173 173 289 289 289 289 289 289 145 145 145 145 233 233 233 233 321 321 321 321 321 321 321 321 321 321 321 321 232 232 232 232 232 47 47 47 47 328 328 328 328 328 328 50 50 50 50 107 107 107 107 107 107 107 107 107 107 107 107 107 193 193 193 193 193 193 341 341 341 341 341 341 341 204 204 204 204 204 204 204 204 115 115 115 115 115 115 115 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 271 271 271 271 271 271 271 271 271 225 225 225 225 225 225 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 281 281 281 281 189 189 189 189 117 117 117 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 115 115 115 115 115 115 115 115 277 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 329 329 329 329 329 189 189 189 189 189 189 236 236 236 236 236 236 236 236 247 247 247 247 247 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 329 144 144 144 144 144 119 119 119 52 52 52 52 52 179 179 179 179 179 179 179 179 179 179 179 179 179 21 21 21 21 21 21 21 225 225 225 225 225 225 225 225 244 244 244 244 244 244 244 244 244 47 47 47 47 47 47 233 233 233 233 233 116 116 116 116 116 116 51 51 51 51 51 51 51 272 272 272 272 272 119 119 119 119 119 119 119 119 119 48 48 48 48 179 179 179 179 179 179 179 179 179 193 193 193 193 193 193 193 193 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 179 179 179 179 179 193 193 193 193 193 340 340 340 340 340 340 340 107 107 107 107 107 107 107 107 133 133 133 133 133 133 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 321 321 321 321 321 321 321 288 288 288 47 47 47 328 328 328 328 328 328 328 328 219 219 219 219 219 219 219 219 219 225 225 225 225 225 225 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 121 121 121 121 121 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 193 193 193 193 112 112 112 112 112 112 112 112 112 331 331 331 331 49 49 49 340 340 340 340 340 340 340 340 340 271 271 271 271 271 271 271 271 271 271 271 271 271 271 225 225 225 225 225 225 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 232 232 232 232 271 271 271 271 271 271 271 271 271 271 271 277 277 277 277 321 321 321 321 321 321 321 321 321 321 321 321 172 172 172 172 172 172 172 172 172 119 119 119 119 119 119 49 49 49 288 288 288 179 179 179 179 179 179 179 179 208 208 208 208 208 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 175 175 175 175 175 175 249 249 249 249 249 249 249 249 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 47 47 47 47 328 328 328 328 328 328 35 35 35 35 35 35 35 35 35 35 35 35 329 329 329 329 329 49 49 49 49 233 233 233 233 233 233 233 225 225 225 225 225 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 233 233 233 116 116 116 179 179 179 208 208 208 208 208 208 208 208 208 208 179 179 179 179 179 179 179 179 37 37 37 37 37 37 116 116 116 116 116 116 119 119 119 48 48 48 48 48 48 107 107 107 107 107 107 107 107 107 107 107 107 107 107 107 53 53 53 53 53 53 53 53 177 177 177 177 177 177 204 204 204 204 204 204 204 204 204 204 204 204 35 35 35 233 233 233 116 116 116 119 119 119 48 48 48 48 279 279 279 279 279 279 279 279 279 279 279 279 279 69 69 69 69 69 69 277 277 277 277 277 277 49 49 49 49 49 224 224 224 224 224 227 227 227 227 227 227 227 227 227 227 133 133 133 133 133 133 133 133 133 133 133 133 276 276 276 276 276 276 276 276 276 276 331 331 331 331 331 331 331 193 193 193 193 193 112 112 112 112 112 112 112 112 107 107 107 107 107 189 189 189 189 189 289 289 289 289 289 289 289 289 289 289 289 289 289 249 249 249 249 249 249 249 221 221 221 221 221 221 49 49 49 49 49 233 233 233 116 116 116 119 119 119 49 49 49 288 288 288 179 179 179 208 208 208 208 208 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 175 175 175 175 175 175 175 249 249 249 249 249 249 189 189 189 189 189 189 189 236 236 236 236 236 236 236 50 50 50 50 219 219 219 219 219 219 219 49 49 49 49 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 281 281 281 193 193 193 193 117 117 117 117 145 145 145 145 145 145 145 145 49 49 49 49 49 109 109 109 109 49 49 49 224 224 224 224 115 115 115 115 115 115 115 115 115 115 193 193 193 193 193 281 281 281 281 281 281 281 281 281 281 289 289 289 289 49 49 49 49 49 49 49 49 49 233 233 233 233 233 233 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 231 231 231 231 231 231 231 231 231 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 276 276 276 276 276 276 276 276 276 331 331 331 331 331 331 331 331 331 53 53 53 53 53 340 340 340 340 340 340 340 340 227 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 316 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 293 293 293 293 293 293 293 293 293 293 109 109 109 109 109 109 109 145 145 145 145 145 145 145 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 175 175 175 175 175 175 175 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 189 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 116 116 116 331 331 331 331 331 331 331 331 331 331 331 331 331 100 100 100 100 100 100 100 100 100 100 331 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 179 179 179 179 208 208 208 208 208 208 208 208 208 208 175 175 175 175 175 175 175 175 249 249 249 249 249 249 249 249 249 249 249 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 119 119 119 119 119 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 116 116 116 116 116 116 191 191 191 191 191 288 288 288 288 288 288 107 107 107 107 107 107 193 193 193 193 193 193 193 232 232 232 232 232 232 232 232 232 232 131 131 131 131 131 131 131 131 131 131 131 131 131 233 233 233 233 233 204 204 204 204 204 204 204 204 204 204 204 204 51 51 51 51 51 51 51 51 121 121 121 121 121 144 144 144 144 144 144 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 232 232 232 232 232 187 187 187 187 187 232 232 232 232 232 232 35 35 35 35 35 35 35 35 35 35 35 329 329 329 329 329 329 49 49 49 49 233 233 233 233 233 233 233 233 233 233 225 225 225 225 225 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 212 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 193 193 193 193 193 193 281 281 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 115 115 115 115 115 115 115 133 133 133 133 133 133 133 173 173 173 173 173 173 173 173 173 173 173 289 289 289 289 225 225 225 225 204 204 204 204 204 204 271 271 271 271 271 271 271 271 305 305 305 305 305 305 305 289 289 289 289 289 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 119 119 119 119 119 193 193 193 193 193 193 193 193 193 280 280 280 280 280 280 280 280 280 280 280 280 280 280 35 35 35 35 35 35 35 233 233 233 233 233 233 233 116 116 116 119 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 287 287 287 287 287 49 49 49 49 177 177 177 177 177 177 177 133 133 133 133 133 133 133 133 121 121 121 121 121 121 121 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 179 179 179 179 37 37 37 37 328 328 328 328 328 328 175 175 175 175 175 175 175 193 193 193 193 193 193 329 329 329 329 329 49 49 49 49 49 49 232 232 232 232 232 50 50 50 50 50 50 271 271 271 271 271 271 271 271 271 271 271 277 277 277 277 193 193 193 289 289 289 289 289 204 204 204 204 204 204 204 204 204 175 175 175 175 175 175 305 305 305 305 305 305 305 116 116 116 116 116 116 116 116 175 175 175 175 175 175 175 175 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 280 280 280 280 280 280 280 280 280 280 35 35 35 35 35 35 340 340 340 340 340 340 340 287 287 287 287 287 48 48 48 48 107 107 107 107 107 107 249 249 249 249 249 249 249 249 249 249 292 292 292 292 292 292 219 219 219 219 219 219 219 333 333 333 133 133 133 133 133 133 281 281 281 281 281 281 281 113 113 113 113 113 113 113 113 113 113 49 49 49 49 49 233 233 233 233 233 233 233 233 233 233 233 233 233 233 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 53 53 53 53 53 288 288 288 288 288 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 293 293 293 293 337 337 337 337 337 337 316 316 316 316 316 316 316 279 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 275 275 275 275 275 275 275 275 275 133 133 133 133 133 133 277 277 277 277 277 277 277 277 277 277 225 225 225 225 225 225 204 204 204 204 204 204 204 204 331 331 331 331 331 331 331 331 133 133 133 133 133 133 233 233 233 233 233 233 233 233 288 288 288 288 171 171 171 171 171 171 171 171 145 145 145 145 228 228 228 228 228 228 179 179 179 179 179 179 179 179 179 179 179 179 179 179 249 249 249 249 249 249 249 249 249 249 249 249 249 249 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 49 49 49 49 288 288 288 288 288 191 191 191 191 191 288 288 288 288 227 227 227 227 227 227 227 227 227 227 53 53 53 53 53 53 53 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 107 107 107 107 107 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 229 229 229 229 229 229 229 293 293 293 293 293 293 293 293 189 189 189 236 236 236 236 236 236 236 236 236 236 236 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 277 277 277 277 133 133 133 133 281 281 281 281 281 281 281 281 281 281 281 281 281 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 35 35 35 233 233 233 233 116 116 116 116 116 47 47 47 47 47 47 47 47 233 233 233 233 233 337 337 337 337 337 337 337 337 337 337 337 321 321 321 321 321 321 321 321 321 321 321 345 345 345 345 345 345 345 333 333 333 333 49 49 49 224 224 224 224 224 331 331 331 331 331 331 193 193 193 193 193 112 112 112 112 112 112 112 112 112 331 331 331 49 49 49 340 340 340 340 340 340 340 340 287 287 287 287 287 287 287 287 287 287 287 165 165 165 165 165 165 165 221 221 221 221 221 221 189 189 189 189 236 236 236 236 236 236 236 179 179 179 179 179 179 179 179 193 193 193 193 193 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 208 208 208 208 208 331 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 119 119 119 119 48 48 48 48 283 283 283 283 283 283 283 283 283 283 283 283 283 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 49 49 49 49 49 49 281 281 281 281 281 281 288 288 288 288 288 288 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 232 232 232 232 232 47 47 47 47 47 47 225 225 225 225 225 225 225 225 225 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 328 328 328 328 328 328 35 35 35 233 233 233 116 116 116 116 179 179 179 179 179 179 179 179 179 165 165 165 165 165 165 165 165 165 165 289 289 289 289 289 289 289 189 189 189 189 116 116 116 287 287 287 287 287 287 48 48 48 48 179 179 179 179 179 179 37 37 37 37 37 37 37 328 328 328 328 328 328 328 328 328 287 287 287 287 287 188 188 188 188 175 175 175 175 175 175 175 248 248 248 248 248 248 248 248 248 248 47 47 47 47 229 229 229 229 229 229 229 53 53 53 53 53 53 236 236 236 236 236 236 236 236 236 279 279 279 279 279 279 289 289 289 289 289 289 289 277 277 277 165 165 165 165 165 165 165 165 165 233 233 233 233 233 233 217 217 217 217 217 217 217 145 145 145 145 145 145 145 145 340 340 340 340 340 340 340 340 340 146 146 146 146 146 146 146 287 287 287 287 287 287 287 320 320 320 320 320 320 320 320 320 131 131 131 131 131 131 131 233 233 233 233 233 204 204 204 204 204 204 204 204 204 271 271 271 271 271 271 271 271 271 225 225 225 225 225 165 165 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 280 280 331 331 331 133 133 133 276 276 276 179 179 179 179 179 179 179 179 179 179 179 208 208 208 208 227 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 288 288 288 288 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 328 328 328 328 328 328 287 287 287 287 287 287 188 188 188 188 188 287 287 287 287 287 287 287 287 287 287 69 69 69 69 69 69 69 69 69 69 69 69 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 293 293 293 337 337 337 337 337 337 337 337 316 316 316 316 316 316 316 316 316 316 316 115 115 115 115 115 115 115 115 115 115 115 277 277 277 277 277 277 133 133 133 133 133 133 133 281 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 51 51 51 51 51 51 51 51 51 51 51 272 272 272 272 272 272 331 331 331 331 189 189 189 120 120 120 50 50 50 50 50 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 219 219 219 219 219 219 219 219 219 219 219 219 21 21 21 21 21 21 21 21 21 225 225 225 225 225 225 225 225 225 144 144 144 144 144 144 144 144 144 144 47 47 47 47 47 233 233 233 233 116 116 116 115 115 115 115 277 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 329 329 329 329 329 189 189 189 189 236 236 236 236 236 187 187 187 187 232 232 232 50 50 50 50 50 107 107 107 107 107 107 107 107 107 107 53 53 53 53 53 53 53 53 53 177 177 177 177 177 177 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 49 49 49 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 229 229 229 229 229 229 229 229 229 293 293 293 293 293 293 293 293 189 189 189 236 236 236 236 236 236 236 236 236 119 119 119 49 49 49 49 49 49 49 49 288 288 288 288 288 288 115 115 115 115 115 115 193 193 193 193 193 193 117 117 117 49 49 49 49 49 233 233 233 233 233 288 288 288 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 273 273 273 273 273 273 273 49 49 49 49 232 232 232 232 232 232 232 67 67 67 67 67 67 67 67 67 67 173 173 173 173 173 173 173 173 173 49 49 49 49 49 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 193 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 275 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 271 271 271 271 271 271 271 271 271 271 21 21 21 21 21 21 21 21 21 21 233 233 233 233 233 233 117 117 117 117 117 144 144 144 144 144 144 144 144 131 131 131 131 340 340 340 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 219 219 219 219 219 219 219 219 305 305 305 305 305 116 116 116 116 227 227 227 227 227 227 227 227 165 165 165 165 165 165 165 165 220 220 220 220 220 220 220 220 220 231 231 231 231 231 231 231 231 231 231 231 231 231 231 231 231 53 53 53 53 53 53 53 53 293 293 293 293 293 293 293 293 293 189 189 189 189 236 236 236 236 236 236 236 236 236 236 51 51 51 51 51 51 51 51 51 51 51 51 51 328 328 328 328 328 328 187 187 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 116 179 179 179 179 179 179 179 144 144 144 144 144 144 39 39 39 39 39 39 39 39 39 39 39 39 173 173 173 173 173 173 173 289 289 289 289 289 145 145 145 145 233 233 233 233 233 321 321 321 321 321 321 321 321 321 321 321 233 233 233 233 233 233 233 233 340 340 340 340 340 187 187 187 187 187 233 233 233 233 233 233 233 217 217 217 217 217 217 265 265 265 265 265 265 265 265 265 265 265 265 229 229 229 229 229 229 229 229 49 49 49 49 49 233 233 233 233 233 288 288 288 288 288 288 288 288 288 288 331 331 331 331 49 49 49 340 340 340 340 279 279 279 279 279 279 279 279 273 273 273 273 273 273 265 265 265 265 265 265 265 265 265 265 265 265 265 265 225 225 225 225 225 225 225 225 225 225 225 225 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 224 224 224 224 224 224 224 224 224 224 215 215 215 215 215 215 189 189 189 189 189 189 189 189 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 133 133 133 133 133 133 272 272 272 272 272 272 272 272 272 247 247 247 247 247 247 247 247 247 247 247 329 329 329 329 329 144 144 144 144 144 287 287 287 287 188 188 188 175 175 175 175 175 175 175 175 277 277 277 277 277 277 209 209 209 209 209 209 209 209 209 232 232 232 232 232 232 175 175 175 175 175 175 165 165 165 165 165 165 165 165 165 165 165 165 165 109 109 109 109 109 49 49 49 49 225 225 225 225 225 340 340 340 340 340 340 340 35 35 35 35 35 35 35 35 35 35 35 35 173 173 173 173 173 289 289 289 289 289 144 144 144 144 144 144 287 287 287 287 287 287 287 287 287 287 287 287 287 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 47 47 47 47 233 233 233 233 116 116 116 116 116 171 171 171 171 171 171 171 171 171 171 101 101 101 101 101 101 101 101 101 101 101 101 101 233 233 233 116 116 116 116 83 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 288 288 171 171 171 171 171 145 145 145 145 145 228 228 228 228 228 228 227 227 227 17 17 17 277 277 277 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 225 225 48 48 48 48 48 48 331 331 331 331 331 331 331 133 133 133 133 133 133 276 276 276 276 276 179 179 179 179 179 179 179 179 209 209 209 209 340 340 340 340 340 340 340 340 175 175 175 175 175 175 69 69 69 69 69 69 69 69 69 69 69 232 232 232 232 47 47 47 47 47 233 233 233 116 116 116 116 331 331 331 331 331 331 331 331 331 331 331 331 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 52 52 52 52 52 52 52 331 331 331 331 331 331 331 331 331 331 331 331 149 149 149 149 149 149 149 121 121 121 121 121 121 204 204 204 204 204 204 331 331 331 331 331 331 331 331 305 305 305 305 229 229 229 229 229 49 49 49 49 232 232 232 232 232 232 232 232 171 171 171 171 171 171 171 171 171 171 171 101 101 101 101 101 101 101 101 101 101 101 101 233 233 233 49 49 49 225 225 225 204 204 204 204 204 204 219 219 219 219 219 219 219 49 49 49 49 233 233 233 221 221 221 221 221 221 221 221 221 225 225 225 321 321 321 321 321 321 321 321 117 117 117 117 117 189 189 189 189 189 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 208 208 208 208 115 115 115 115 115 115 53 53 53 53 53 53 53 341 341 341 49 49 49 49 232 232 232 232 232 232 232 232 215 215 215 215 215 215 215 215 215 215 133 133 133 133 133 133 133 233 233 233 233 233 145 145 145 145 145 145 145 49 49 49 49 225 225 225 225 225 225 204 204 204 204 204 204 204 175 175 175 175 175 175 175 248 248 248 248 248 248 248 248 248 287 287 287 287 188 188 188 188 188 188 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 85 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 119 119 119 119 189 189 189 189 189 280 280 280 280 280 280 280 287 287 287 287 287 287 287 287 287 101 101 101 101 101 101 101 101 228 228 228 228 228 228 228 47 47 47 47 328 328 328 328 328 328 335 335 335 335 335 335 193 193 193 193 193 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 116 179 179 179 179 179 179 179 179 179 208 208 208 208 208 231 231 231 231 231 231 231 231 231 231 231 231 133 133 133 133 133 133 329 329 329 329 329 329 144 144 144 144 144 144 144 327 327 327 327 327 327 327 327 327 193 193 193 193 193 193 341 341 341 341 341 341 341 341 341 189 189 189 189 189 189 189 189 189 289 289 289 289 289 289 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 191 172 172 172 172 179 179 179 209 209 209 209 209 209 116 116 116 116 116 116 116 275 275 275 275 275 275 53 53 53 53 53 53 232 232 232 232 232 83 83 83 83 83 83 83 83 83 83 288 288 288 288 47 47 47 328 328 328 328 328 328 287 287 287 287 287 287 287 287 287 287 287 287 149 149 149 149 149 149 233 233 233 233 233 49 49 49 49 49 272 272 272 272 279 279 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 209 209 116 116 116 179 179 179 179 179 179 208 208 208 208 208 208 331 331 331 331 331 331 331 305 305 305 305 305 117 117 117 49 49 49 49 49 233 233 233 233 288 288 288 115 115 115 277 277 277 277 277 133 133 133 133 133 280 280 280 280 280 280 280 280 280 280 51 51 51 51 51 51 51 51 272 272 272 272 272 47 47 47 47 233 233 233 116 116 116 287 287 287 287 287 287 287 287 165 165 165 165 165 165 165 220 220 220 220 220 220 220 119 119 119 119 52 52 52 52 107 107 107 107 107 107 107 107 107 107 53 53 53 53 53 53 53 177 177 177 177 177 177 204 204 204 204 204 204 204 204 204 287 287 287 287 287 188 188 188 188 175 175 175 175 175 175 248 248 248 248 248 248 248 248 248 248 248 171 171 171 171 171 171 171 171 171 144 144 144 144 227 227 227 227 227 227 227 227 69 69 69 69 69 69 69 69 69 69 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 335 335 335 335 335 335 335 133 133 133 133 133 133 288 288 288 288 288 288 288 279 279 279 279 279 279 53 53 53 229 229 229 229 229 229 293 293 293 293 293 293 189 189 189 189 236 236 236 236 236 236 227 227 227 227 227 227 227 227 227 227 227 227 53 53 53 53 53 53 53 281 281 281 281 281 281 288 288 288 179 179 179 37 37 37 328 328 328 328 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 273 273 273 273 273 273 49 49 49 49 49 233 233 233 116 116 116 116 116 279 279 279 279 279 279 279 279 279 193 193 193 193 233 233 233 233 233 280 280 280 280 280 280 280 280 223 223 223 223 223 223 223 37 37 37 37 37 37 37 37 37 37 280 280 280 280 280 280 280 280 280 280 280 231 231 231 231 231 101 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 287 287 287 287 48 48 48 48 279 279 279 279 279 279 279 289 289 289 289 289 289 21 21 21 21 21 277 277 277 277 277 288 288 288 288 179 179 179 179 179 179 179 193 193 193 193 228 228 228 228 228 228 228 228 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 228 228 228 228 228 228 228 219 219 219 219 219 219 219 219 219 219 225 225 225 225 225 209 209 209 209 209 209 209 209 232 232 232 232 232 232 271 271 271 271 271 271 53 53 53 53 53 53 53 53 53 341 341 341 341 49 49 49 49 49 225 225 225 225 225 225 225 225 225 116 116 116 119 119 119 119 37 37 37 37 37 37 37 289 289 289 289 289 280 280 280 280 280 331 331 331 331 331 53 53 53 53 53 53 53 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 116 102 102 102 102 102 102 102 102 331 331 331 331 331 331 331 331 331 331 331 249 249 249 249 249 249 249 249 233 233 233 288 288 288 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 50 50 50 50 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 193 193 193 233 233 233 233 233 49 49 49 49 49 289 289 289 289 289 280 280 280 280 280 280 280 271 271 271 271 271 271 271 209 209 209 209 209 209 209 209 280 280 280 280 280 280 47 47 47 47 328 328 328 328 328 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 101 233 233 233 116 116 116 116 146 146 146 146 146 146 219 219 219 219 219 219 219 219 219 219 219 219 219 219 21 21 21 21 21 21 233 233 233 233 233 233 233 285 285 285 285 285 285 285 49 49 49 49 49 233 233 233 233 233 233 233 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 233 233 233 233 233 289 289 289 289 289 289 289 193 193 193 193 193 224 224 224 224 224 224 224 102 102 102 102 102 102 102 102 102 231 231 231 231 231 231 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 248 331 331 331 331 331 331 331 53 53 53 288 288 288 179 179 179 37 37 37 340 340 340 340 340 340 340 340 340 287 287 287 287 287 287 287 165 165 165 165 165 165 165 221 221 221 221 221 49 49 49 49 49 232 232 232 232 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 337 337 337 316 316 316 316 316 219 219 219 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 293 293 293 293 293 293 293 109 109 109 109 109 109 145 145 145 145 145 145 145 288 288 288 288 288 288 288 288 288 288 288 83 83 83 83 83 83 83 83 288 288 288 288 288 288 47 47 47 328 328 328 328 328 328 328 35 35 35 35 35 35 35 35 35 329 329 329 329 329 49 49 49 233 233 233 233 233 233 225 225 225 225 225 212 212 212 212 212 212 212 212 212 287 287 287 287 287 287 49 49 49 117 117 117 117 117 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 221 221 221 221 221 221 221 221 221 69 69 69 69 69 69 277 277 277 277 117 117 117 117 189 189 189 237 237 237 237 237 237 237 225 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 173 173 173 173 173 289 289 289 289 144 144 144 144 144 144 287 287 287 287 287 287 287 287 287 287 287 287 287 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 227 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 275 275 275 275 275 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 224 224 224 224 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 288 288 288 288 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 208 179 179 179 179 37 37 37 37 116 116 116 231 231 231 231 231 21 21 21 21 21 21 21 288 288 288 288 288 288 288 288 171 171 171 171 171 21 21 21 21 21 276 276 276 276 276 276 276 287 287 287 287 287 188 188 188 188 175 175 175 175 175 175 175 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 48 48 48 48 48 107 107 107 107 107 107 107 107 107 193 193 193 193 193 193 193 193 193 193 176 176 176 176 176 176 176 176 176 176 176 176 176 176 176 176 176 1 1 1 1 1 1 1 1 275 275 275 275 275 275 275 275 37 37 37 37 37 37 37 37 37 37 37 229 229 229 229 229 229 229 229 229 109 109 109 109 225 225 225 225 189 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 67 67 67 67 67 67 67 67 67 67 67 67 277 277 277 277 277 277 277 113 113 113 113 113 113 113 113 113 113 145 145 145 145 116 116 116 127 127 127 229 229 229 229 229 229 229 229 229 109 109 109 109 109 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 145 145 145 145 145 145 116 116 116 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 331 331 331 331 133 133 133 133 276 276 276 276 276 119 119 119 119 48 48 48 48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 223 223 223 223 223 223 223 223 193 193 193 193 193 193 193 193 193 193 329 329 329 329 329 329 329 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 50 50 50 50 50 50 279 279 279 279 279 279 279 279 279 279 221 221 221 221 221 221 37 37 37 37 37 37 37 37 37 37 37 37 37 37 233 233 233 233 288 288 288 288 288 288 288 219 219 219 219 219 219 219 219 219 219 219 219 333 333 333 333 69 69 69 69 277 277 277 277 289 289 289 289 144 144 144 144 144 144 144 144 47 47 47 47 328 328 328 328 328 328 50 50 50 50 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 224 224 224 224 224 224 224 224 51 51 51 51 51 272 272 272 272 272 119 119 119 119 48 48 48 48 48 48 48 48 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 249 249 116 116 116 171 171 171 171 171 171 171 171 171 145 145 145 145 228 228 228 228 223 223 223 223 223 223 223 223 223 193 193 193 193 193 193 233 233 233 233 233 233 117 117 117 117 340 340 340 179 179 179 179 179 179 179 179 179 179 21 21 21 21 21 21 225 225 225 225 225 225 225 225 244 244 244 244 244 244 244 244 244 244 244 244 244 244 244 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 287 287 287 287 287 287 287 188 188 188 107 107 107 107 208 208 208 208 208 208 283 283 283 283 283 283 283 283 283 283 305 305 305 305 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 52 52 52 52 52 52 52 52 52 223 223 223 223 223 223 223 69 69 69 69 69 69 69 69 69 69 69 236 236 236 236 236 236 236 236 236 236 223 223 223 223 223 223 223 223 223 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 227 227 227 227 227 227 227 165 165 165 165 165 165 165 165 165 116 116 116 116 116 191 191 191 288 288 288 288 288 288 50 50 50 175 175 175 175 175 175 175 175 175 175 305 305 305 305 305 305 116 116 116 116 116 116 116 115 115 115 115 115 115 209 209 209 209 209 209 224 224 224 224 224 171 171 171 171 171 171 171 171 171 171 171 171 171 149 149 149 149 149 149 149 149 149 149 149 121 121 121 121 121 121 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 53 53 53 293 293 293 293 293 293 293 293 293 109 109 109 109 109 145 145 145 145 145 145 145 145 289 289 289 289 289 280 280 280 280 280 280 280 280 171 171 171 171 171 171 171 21 21 21 21 21 21 21 21 21 21 21 21 21 121 121 121 121 121 121 121 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 340 340 340 340 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 47 47 47 47 47 233 233 233 116 116 116 116 116 279 279 279 279 279 279 279 279 279 101 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 49 49 49 49 49 49 233 233 233 233 233 288 288 288 288 131 131 131 131 340 340 340 340 340 179 179 179 179 179 179 189 189 189 189 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 53 53 53 232 232 232 232 232 232 232 35 35 35 35 35 35 35 35 35 35 35 173 173 173 173 173 173 289 289 289 289 289 144 144 144 144 144 179 179 179 179 179 179 193 193 193 193 193 193 193 193 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 37 37 37 116 116 116 116 116 175 175 175 175 175 175 175 175 21 21 21 21 21 21 21 21 288 288 288 288 288 131 131 131 340 340 340 340 340 340 340 340 340 340 340 340 171 171 171 171 171 171 171 171 171 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 276 276 276 276 276 276 47 47 47 47 47 47 47 333 333 333 333 333 333 333 333 164 164 164 164 164 164 164 164 164 164 164 131 131 131 131 340 340 340 340 340 179 179 179 179 208 208 208 208 208 208 271 271 271 271 271 271 271 21 21 21 21 21 21 21 21 21 21 281 281 281 281 281 281 281 281 281 49 49 49 49 49 109 109 109 109 109 225 225 225 225 225 225 225 204 204 204 204 204 204 219 219 219 219 219 219 219 219 219 305 305 305 305 305 305 305 305 305 305 116 116 116 116 116 116 171 171 171 171 171 171 171 145 145 145 145 145 228 228 228 228 228 228 179 179 179 179 179 189 189 189 340 340 340 340 340 340 340 340 340 340 171 171 171 171 133 133 133 133 133 225 225 225 225 225 225 244 244 244 244 244 244 244 227 227 227 227 227 227 227 227 227 227 133 133 133 133 133 133 133 133 133 133 232 232 232 232 232 232 232 232 232 331 331 331 331 331 331 189 189 189 121 121 121 121 121 85 85 85 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 288 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 221 221 221 221 221 221 221 221 113 113 113 113 113 113 113 113 113 113 325 325 325 325 325 49 49 49 49 225 225 225 225 225 225 225 204 204 204 204 204 204 204 204 275 275 275 275 275 275 275 205 205 205 289 289 289 289 289 289 289 289 289 289 289 277 277 277 277 209 209 209 209 209 209 289 289 289 289 189 189 189 189 236 236 236 236 236 236 236 236 236 187 187 187 187 233 233 233 233 233 289 289 289 289 289 48 48 48 48 119 119 119 119 52 52 52 52 52 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 305 305 305 305 305 305 117 117 117 117 117 117 117 117 340 340 340 340 340 340 340 340 340 340 340 1 1 1 331 331 331 331 331 133 133 133 133 232 232 232 232 179 179 179 179 179 208 208 208 208 208 208 171 171 171 171 171 171 171 171 171 85 85 85 85 85 85 85 85 85 233 233 233 233 117 117 117 189 189 189 189 116 116 116 179 179 179 193 193 193 193 340 340 340 340 340 340 340 179 179 179 179 179 249 249 249 249 249 249 249 229 229 229 229 229 229 229 229 281 281 281 281 281 281 281 281 289 289 289 289 289 289 289 137 137 137 137 137 137 137 137 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 175 175 175 175 175 175 277 277 277 277 209 209 209 209 209 209 209 209 209 209 209 232 232 232 232 232 175 175 175 175 175 165 165 165 165 165 165 165 165 165 165 165 165 165 109 109 109 109 109 49 49 49 49 225 225 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 331 331 331 331 49 49 49 340 340 340 340 340 340 340 340 340 340 107 107 107 107 107 193 193 193 193 193 193 193 193 225 225 225 225 225 225 225 225 225 225 288 288 288 288 288 35 35 35 288 288 288 288 288 119 119 119 119 52 52 52 52 171 171 171 171 171 171 171 171 171 171 171 171 171 149 149 149 149 149 149 149 149 149 149 293 293 293 49 49 49 49 49 49 49 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 131 131 131 131 131 131 131 131 131 216 216 216 216 216 216 216 216 216 216 47 47 47 47 328 328 328 179 179 179 189 189 189 189 340 340 340 340 340 340 340 340 340 219 219 219 219 219 219 219 225 225 225 225 225 193 193 193 193 193 193 277 277 277 277 277 277 277 277 116 116 116 116 116 116 223 223 223 223 223 223 223 223 37 37 37 37 37 37 37 37 37 37 37 37 37 233 233 233 233 233 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 233 233 233 233 233 233 233 116 116 116 119 119 119 133 133 133 133 133 133 276 276 276 276 276 276 276 276 276 187 187 187 187 288 288 288 288 288 288 288 288 331 331 331 331 331 53 53 53 53 53 53 53 53 53 53 53 53 340 340 340 340 340 340 340 340 287 287 287 287 287 287 287 48 48 48 48 119 119 119 119 119 189 189 189 189 189 189 280 280 280 280 280 280 280 280 280 115 115 115 115 115 115 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 133 133 133 133 133 133 133 277 277 277 277 277 277 277 277 225 225 225 225 225 204 204 204 204 204 204 327 327 327 327 327 327 327 193 193 193 193 193 193 341 341 341 341 341 341 341 49 49 49 49 109 109 109 109 49 49 49 49 224 224 224 224 224 224 224 224 224 171 171 171 171 171 171 145 145 145 145 145 228 228 228 228 228 119 119 119 52 52 52 227 227 227 227 227 227 227 165 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 275 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 249 249 249 249 116 116 116 116 116 47 47 47 47 47 47 225 225 225 225 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 236 236 236 236 236 331 331 331 331 331 331 331 193 193 193 193 193 112 112 112 112 112 112 112 112 112 112 112 112 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 119 119 119 119 119 204 204 204 204 204 204 204 204 204 204 51 51 51 51 51 51 121 121 121 121 121 121 121 144 144 144 144 144 144 144 144 144 144 144 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 329 329 329 329 329 329 49 49 49 233 233 233 233 225 225 225 225 225 212 212 212 212 212 212 179 179 179 179 179 179 179 179 179 179 179 179 85 85 85 85 85 85 85 85 85 85 85 85 281 281 281 281 281 281 281 281 189 189 189 189 189 189 189 340 340 340 340 340 340 340 340 340 331 331 331 331 331 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 279 279 279 279 279 279 279 279 279 279 279 279 279 279 249 249 249 249 249 249 285 285 285 285 285 285 285 285 285 285 49 49 49 49 49 109 109 109 109 109 225 225 225 225 204 204 204 204 204 204 279 279 279 279 279 279 279 279 279 279 193 193 193 113 113 113 113 113 113 113 113 113 113 113 317 317 317 317 317 317 317 169 169 169 169 169 169 169 169 169 169 289 289 289 289 189 189 189 189 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 165 165 165 165 165 165 113 113 113 113 113 113 113 49 49 49 49 49 49 49 224 224 224 223 223 223 223 223 223 223 223 193 193 193 193 193 193 193 193 233 233 233 233 233 233 233 233 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 115 115 115 115 115 115 115 193 193 193 193 116 116 116 231 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 288 288 288 288 288 288 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 193 193 193 193 329 329 329 329 329 189 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 187 187 187 187 232 232 232 232 232 279 279 279 279 279 279 53 53 53 53 53 53 112 112 112 112 112 112 50 50 50 50 50 271 271 271 271 271 271 271 271 271 271 271 225 225 225 225 165 165 165 165 165 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 193 193 193 193 193 193 329 329 329 329 329 329 189 189 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 35 35 35 35 35 35 35 35 35 35 288 288 288 288 288 288 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 289 289 289 289 289 289 280 280 280 280 280 280 280 280 215 215 215 215 215 215 215 53 53 53 53 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 119 119 119 119 37 37 37 37 37 37 37 37 289 289 289 289 289 280 280 280 280 280 280 331 331 331 331 331 331 331 331 53 53 53 53 53 53 53 53 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 133 133 133 133 116 116 116 116 116 116 131 131 131 340 340 340 340 283 283 283 283 283 283 283 283 283 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 133 133 133 133 133 133 273 273 273 273 273 273 273 288 288 288 288 288 288 47 47 47 47 47 225 225 225 225 225 225 225 225 225 69 69 69 69 69 69 69 236 236 236 236 236 236 236 236 119 119 119 119 52 52 52 52 52 115 115 115 115 115 115 115 115 115 209 209 209 209 209 209 209 209 272 272 272 272 272 272 272 272 275 275 275 275 275 275 275 275 275 275 275 275 275 53 53 53 53 53 53 289 289 289 289 289 289 289 289 189 189 189 189 189 189 116 116 116 116 116 116 116 116 116 116 116 175 175 175 175 175 175 175 277 277 277 277 277 277 37 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 281 281 281 281 281 281 281 204 204 204 204 204 204 204 223 223 223 223 223 223 223 223 223 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 69 69 69 69 69 69 69 69 69 277 277 277 277 277 117 117 117 117 117 145 145 145 145 145 145 116 116 116 116 116 116 116 331 331 331 331 189 189 189 120 120 120 120 120 120 120 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 225 225 225 116 116 116 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 340 340 107 107 107 107 305 305 305 285 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 49 49 49 49 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 289 289 289 289 280 280 280 280 280 280 280 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 331 331 331 331 331 331 331 331 53 53 53 53 233 233 233 233 233 233 117 117 117 117 117 144 144 144 144 144 144 144 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 316 316 316 316 47 47 47 233 233 233 116 116 116 227 227 227 17 17 17 277 277 277 277 277 277 277 277 277 277 193 193 193 193 225 225 225 225 225 225 225 225 225 48 48 48 48 19 19 19 19 19 19 19 19 276 276 276 276 276 276 276 107 107 107 107 107 107 107 107 107 107 107 107 107 107 107 249 249 249 249 249 249 249 249 249 249 292 292 292 292 292 292 292 50 50 50 50 50 223 223 223 223 193 193 193 289 289 289 289 289 289 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 116 116 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 223 223 223 223 223 223 223 223 223 223 223 223 193 193 193 193 193 193 193 329 329 329 189 189 189 189 236 236 236 236 236 236 236 47 47 47 47 47 47 333 333 333 333 333 333 333 333 333 333 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 107 107 107 107 107 107 37 37 37 37 37 37 37 37 37 37 37 220 220 220 220 220 220 220 220 220 179 179 179 179 179 179 179 209 209 209 209 209 209 209 276 276 276 276 276 276 107 107 107 107 107 107 100 100 100 100 100 100 100 100 100 119 119 119 119 49 49 49 229 229 229 229 229 229 281 281 281 281 281 281 281 281 281 133 133 133 133 133 133 225 225 225 225 225 225 225 225 225 225 225 225 329 329 329 329 329 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 287 287 287 287 287 287 287 287 287 287 287 277 277 277 277 277 209 209 209 209 209 209 209 209 209 209 209 340 340 340 340 340 340 19 19 19 19 19 19 277 277 277 233 233 233 233 288 288 288 288 288 227 227 227 227 227 227 53 53 53 53 53 53 112 112 112 112 112 112 112 112 219 219 219 219 219 219 219 219 219 219 53 53 53 53 229 229 229 229 229 273 273 273 273 49 49 49 49 233 233 233 233 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 248 248 248 248 248 248 248 248 248 248 115 115 115 115 115 115 193 193 193 193 193 193 193 193 193 276 276 276 276 276 231 231 231 231 231 231 231 231 231 249 249 249 249 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 340 191 191 191 191 172 172 172 172 172 172 119 119 119 119 119 164 164 164 164 164 164 164 164 164 164 164 331 331 331 331 331 331 331 331 331 331 331 331 148 148 148 148 148 148 148 148 148 148 119 119 119 119 119 133 133 133 277 277 277 277 277 277 277 116 116 116 116 107 107 107 107 204 204 204 204 204 187 187 187 187 187 187 233 233 233 233 233 233 53 53 53 53 53 172 172 172 172 172 172 172 47 47 47 47 328 328 328 328 328 328 328 119 119 119 119 49 49 49 49 49 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 116 116 116 116 116 116 116 275 275 275 275 321 321 321 321 321 293 293 293 293 293 144 144 144 144 144 223 223 223 223 305 305 305 305 220 220 220 220 220 220 35 35 35 288 288 288 288 288 271 271 271 271 271 271 271 271 271 271 209 209 209 209 209 209 209 209 209 209 273 273 273 273 273 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 287 287 287 287 287 287 188 188 188 188 107 107 107 107 208 208 208 208 208 208 283 283 283 283 283 283 283 283 283 283 283 305 305 305 305 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 164 164 164 164 164 164 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 228 228 228 228 228 228 228 219 219 219 219 219 219 49 49 49 49 49 233 233 233 233 289 289 289 289 289 289 289 289 133 133 133 133 233 233 233 233 233 289 289 289 289 289 289 49 49 49 49 116 116 116 187 187 187 187 187 233 233 233 233 233 233 233 233 53 53 53 53 53 53 53 53 172 172 172 172 172 172 172 172 172 107 107 107 107 107 53 53 53 53 288 288 288 119 119 119 119 133 133 133 133 232 232 232 232 232 102 102 102 102 102 102 102 102 102 279 279 279 279 279 279 279 279 49 49 49 49 273 273 273 273 273 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 119 119 119 119 133 133 133 276 276 276 276 335 335 335 335 335 335 335 335 335 335 335 335 321 321 321 321 321 321 321 341 341 341 341 341 341 341 341 116 116 116 287 287 287 320 320 320 320 320 320 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 50 50 50 50 50 50 50 107 107 107 107 107 107 107 107 21 21 21 21 21 21 21 21 21 21 117 117 117 117 204 204 204 204 204 219 219 219 219 219 219 49 49 49 49 49 232 232 232 232 175 175 175 175 175 193 193 193 193 288 288 288 335 335 335 335 335 335 335 321 321 321 321 341 341 341 341 341 341 341 116 116 116 287 287 287 287 320 320 320 320 320 320 320 320 131 131 131 233 233 233 233 233 205 205 205 205 205 293 293 293 293 293 293 293 293 197 197 197 197 197 197 197 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 207 207 207 207 207 207 207 207 207 207 207 207 207 329 329 329 329 329 189 189 189 189 232 232 232 287 287 287 287 188 188 188 107 107 107 107 209 209 209 209 209 189 189 189 189 189 236 236 236 236 236 236 236 236 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 237 237 237 237 237 237 237 237 237 237 237 237 237 237 237 237 237 116 116 116 116 116 116 131 131 131 131 131 340 340 340 340 340 340 340 340 340 119 119 119 119 119 204 204 204 204 204 204 204 99 99 99 277 277 277 277 277 277 277 277 277 277 277 189 189 189 189 285 285 285 285 285 285 285 285 285 229 229 229 229 49 49 49 232 232 232 232 232 232 232 279 279 279 279 279 279 279 133 133 133 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 193 193 193 193 193 193 292 292 292 292 292 292 119 119 119 119 193 193 193 193 193 193 193 193 193 193 193 193 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 224 224 224 224 224 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 133 133 133 133 133 133 273 273 273 273 273 273 273 273 273 288 288 288 288 288 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 47 47 47 328 328 328 328 328 119 119 119 52 52 52 52 52 52 223 223 223 223 223 223 223 223 223 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 233 233 233 233 233 233 233 233 289 289 289 289 289 48 48 48 119 119 119 48 48 48 48 107 107 107 107 107 37 37 37 37 37 37 37 37 37 37 37 37 37 221 221 221 221 221 221 221 221 337 337 337 337 337 337 25 25 25 25 25 25 25 25 25 277 277 277 277 277 277 116 116 116 116 47 47 47 328 328 328 328 328 328 328 175 175 175 175 175 175 277 277 277 277 209 209 209 209 209 209 209 209 209 209 209 209 232 232 232 232 175 175 175 175 175 175 165 165 165 165 165 165 165 165 165 165 165 109 109 109 109 109 49 49 49 225 225 225 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 327 327 327 327 327 327 327 327 133 133 133 133 133 133 277 277 277 277 277 277 277 277 204 204 204 204 204 204 204 204 204 204 175 175 175 175 175 175 175 175 175 277 277 277 277 277 277 277 277 277 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 232 232 232 232 232 232 232 35 35 35 35 35 35 35 35 233 233 233 233 233 116 116 116 116 1 1 1 231 231 231 231 231 231 209 209 209 209 209 209 209 209 209 209 209 209 209 288 288 288 288 288 288 288 35 35 35 35 35 35 35 233 233 233 116 116 116 271 271 271 271 271 277 277 277 277 189 189 189 189 189 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 280 280 280 280 280 280 280 280 280 280 280 331 331 331 331 49 49 49 49 340 340 340 340 340 340 119 119 119 119 119 37 37 37 37 37 37 37 288 288 288 288 335 335 335 335 335 335 335 21 21 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 288 288 288 288 288 47 47 47 47 109 109 109 109 109 109 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 288 288 19 19 19 19 19 19 19 19 19 232 232 232 232 232 232 331 331 331 331 331 331 331 331 53 53 53 53 53 232 232 232 232 232 232 232 232 232 279 279 279 279 279 279 279 279 279 279 279 101 101 101 101 101 101 101 101 101 101 101 101 101 101 116 116 116 116 116 116 116 331 331 331 331 189 189 189 292 292 292 292 292 292 292 175 175 175 175 175 175 175 277 277 277 277 277 165 165 165 165 165 165 165 165 165 165 165 288 288 288 288 288 288 288 288 1 1 1 271 271 271 271 271 271 271 271 169 169 169 169 169 169 169 169 289 289 289 289 289 289 289 289 289 277 277 277 277 205 205 205 205 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 221 221 221 221 221 221 221 49 49 49 49 224 224 224 224 224 224 224 224 331 331 331 331 331 331 331 331 193 193 193 193 225 225 225 225 225 225 225 225 225 253 253 253 253 253 253 253 253 253 253 253 253 253 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 119 119 119 204 204 204 204 204 204 204 204 204 204 204 204 51 51 51 51 51 51 51 121 121 121 121 121 121 144 144 144 144 144 144 144 144 144 144 331 331 331 331 331 189 189 189 292 292 292 292 292 292 292 292 271 271 271 271 271 271 271 271 271 271 277 277 277 277 277 193 193 193 193 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 223 223 223 223 223 21 21 21 21 21 21 21 21 229 229 229 229 229 109 109 109 109 109 17 17 17 277 277 277 277 117 117 117 117 205 205 205 205 205 205 205 205 205 205 205 205 205 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 231 231 231 231 231 231 231 231 21 21 21 21 21 21 288 288 288 288 288 50 50 50 50 50 50 50 279 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 277 277 277 277 277 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 279 279 279 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 289 193 193 193 193 193 220 220 220 220 220 220 220 220 220 220 220 220 220 231 231 231 231 231 231 69 69 69 69 69 69 276 276 276 276 276 276 276 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 249 249 249 249 249 249 249 249 249 249 249 249 249 249 232 232 232 232 232 232 331 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 287 287 287 287 48 48 48 107 107 107 107 208 208 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 171 171 171 171 171 171 171 171 171 69 69 69 69 276 276 276 276 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 331 331 331 331 331 331 305 305 305 305 116 116 116 179 179 179 179 179 179 179 37 37 37 37 37 328 328 328 328 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 232 232 232 232 191 191 191 191 191 191 191 191 288 288 288 288 288 288 191 191 191 191 191 172 172 172 172 172 172 172 172 172 119 119 119 119 119 119 119 133 133 133 133 133 276 276 276 276 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 116 116 116 116 116 116 116 116 107 107 107 107 107 107 49 49 49 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 271 271 271 271 271 271 271 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 329 329 329 329 329 49 49 49 49 289 289 289 289 289 289 225 225 225 225 225 225 225 225 204 204 204 204 204 204 204 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 340 47 47 47 47 47 328 328 328 328 328 119 119 119 119 204 204 204 204 204 204 204 47 47 47 47 47 273 273 273 273 273 273 273 273 273 273 273 273 193 193 193 193 233 233 233 233 233 337 337 337 337 337 337 337 49 49 49 49 232 232 232 232 232 232 119 119 119 49 49 49 49 288 288 288 288 288 288 227 227 227 227 17 17 17 277 277 277 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 48 48 48 48 48 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 53 293 293 293 293 293 293 293 109 109 109 109 109 145 145 145 145 145 145 145 145 145 145 288 288 288 288 279 279 279 279 279 279 279 279 279 279 279 279 333 333 333 333 333 133 133 133 133 273 273 273 273 273 273 288 288 288 288 288 288 288 288 288 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 335 335 335 335 335 21 21 21 21 21 21 21 21 21 21 21 277 277 277 277 116 116 116 116 247 247 247 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 329 329 144 144 144 144 144 144 144 144 144 144 131 131 131 131 131 340 340 340 340 340 340 340 340 340 67 67 67 67 67 67 67 67 67 67 67 67 67 173 173 173 173 173 173 173 173 173 173 49 49 49 49 232 232 232 232 232 131 131 131 131 340 340 340 340 283 283 283 283 283 283 283 283 208 208 208 208 208 208 279 279 279 279 279 279 279 279 333 333 333 333 133 133 133 133 133 273 273 273 273 273 273 288 288 288 179 179 179 179 179 179 179 144 144 144 144 144 179 179 179 179 179 179 179 179 179 179 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 53 53 53 53 53 53 53 53 232 232 232 232 232 232 219 219 219 219 219 219 305 305 305 116 116 116 179 179 179 37 37 37 328 328 328 328 328 328 328 207 207 207 207 207 207 207 207 207 207 207 289 289 289 49 49 49 232 232 232 232 232 50 50 50 50 50 227 227 227 227 227 227 227 227 227 227 227 209 209 209 209 209 209 209 209 209 209 209 209 209 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 67 67 67 67 67 67 67 67 67 67 67 172 172 172 172 172 172 172 172 119 119 119 119 52 52 52 175 175 175 175 175 277 277 277 277 277 277 85 85 85 85 85 85 85 85 85 85 85 85 85 233 233 233 233 116 116 116 116 116 331 331 331 331 331 331 189 189 189 121 121 121 121 121 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 288 288 288 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 145 145 145 145 145 109 109 109 109 109 109 109 109 277 277 277 277 193 193 193 229 229 229 229 229 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 119 119 119 52 52 52 52 271 271 271 271 271 271 271 277 277 277 277 49 49 49 49 329 329 329 329 329 149 149 149 149 149 149 149 149 149 149 149 149 149 149 109 109 109 109 109 205 205 205 205 49 49 49 49 49 49 49 49 224 224 224 224 224 271 271 271 271 271 271 271 271 271 271 271 271 271 271 133 133 133 133 133 220 220 220 220 220 220 220 220 47 47 47 47 47 328 328 328 328 328 328 328 115 115 115 115 115 115 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 193 193 193 193 193 193 281 281 281 281 281 281 189 189 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 275 275 275 275 275 37 37 37 37 37 37 37 37 37 273 273 273 273 273 273 288 288 288 288 279 279 279 279 279 279 279 279 279 279 279 279 229 229 229 229 229 229 229 21 21 21 21 277 277 277 277 277 277 277 289 289 289 289 289 289 225 225 225 225 225 204 204 204 204 204 204 35 35 35 35 35 288 288 288 119 119 119 119 204 204 204 204 219 219 219 219 219 219 219 219 193 193 193 193 193 113 113 113 113 113 113 113 113 113 49 49 49 49 232 232 232 232 232 232 232 232 115 115 115 115 115 69 69 69 69 69 69 69 69 69 69 69 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 116 116 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 133 133 133 133 133 133 133 133 273 273 273 273 273 273 288 288 288 288 288 191 191 191 191 232 232 232 232 232 232 232 232 232 331 331 331 331 193 193 193 193 193 232 232 232 232 232 232 232 232 107 107 107 107 107 193 193 193 193 193 117 117 117 117 189 189 189 189 232 232 232 232 232 232 232 287 287 287 287 287 287 188 188 188 188 115 115 115 115 115 115 115 320 320 320 320 320 320 320 320 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 52 52 52 52 52 219 219 219 219 219 219 219 219 219 219 193 193 193 193 193 193 113 113 113 113 113 113 113 113 113 49 49 49 49 49 49 232 232 232 232 232 232 35 35 35 35 288 288 288 288 288 288 288 175 175 175 175 175 175 277 277 277 277 209 209 209 209 209 209 209 209 209 232 232 232 232 232 232 175 175 175 175 175 175 165 165 165 165 165 165 165 165 165 165 165 165 165 109 109 109 109 49 49 49 49 225 225 225 225 225 225 225 225 225 225 225 340 340 340 340 340 340 340 331 331 331 49 49 49 340 340 340 340 340 340 340 340 50 50 50 50 50 111 111 111 111 111 111 111 111 111 111 111 193 193 193 193 277 277 277 277 277 173 173 173 173 173 173 49 49 49 49 224 224 224 224 224 47 47 47 47 273 273 273 273 273 273 273 21 21 21 21 21 21 21 277 277 277 277 277 277 277 289 289 289 289 229 229 229 49 49 49 233 233 233 233 233 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 276 276 276 276 276 276 276 276 276 276 331 331 331 331 331 331 331 331 331 331 305 305 305 305 116 116 116 116 179 179 179 37 37 37 328 328 328 328 328 328 107 107 107 107 107 107 193 193 193 193 193 232 232 232 232 232 232 232 232 232 232 232 111 111 111 111 111 111 111 111 111 111 193 193 193 193 277 277 277 277 277 277 277 277 173 173 173 173 173 173 173 173 173 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 172 172 172 172 172 172 172 191 191 191 191 288 288 288 288 288 179 179 179 37 37 37 116 116 116 116 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 21 21 288 288 288 288 288 288 107 107 107 107 193 193 193 193 232 232 232 232 232 232 232 232 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 165 165 165 165 165 165 233 233 233 233 233 233 233 233 233 173 173 173 173 173 173 49 49 49 49 225 225 225 225 225 204 204 204 204 204 204 204 204 204 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 225 225 225 225 225 225 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 232 232 232 232 232 232 131 131 131 131 131 340 340 340 340 340 340 340 340 340 340 287 287 287 287 287 188 188 188 188 175 175 175 175 175 175 175 175 193 193 193 193 193 193 328 328 328 328 328 187 187 187 187 187 187 288 288 288 288 288 288 279 279 279 279 279 279 53 53 53 53 229 229 229 229 229 229 229 293 293 293 293 293 189 189 189 189 236 236 236 236 236 236 236 236 236 47 47 47 47 328 328 328 328 328 119 119 119 119 204 204 204 204 204 204 204 47 47 47 47 47 273 273 273 273 273 273 273 273 273 273 273 273 273 273 193 193 193 193 193 277 277 277 277 277 277 277 277 277 277 49 49 49 49 49 49 49 49 49 49 233 233 233 233 233 233 280 280 280 280 280 280 280 280 47 47 47 47 47 328 328 328 328 47 47 47 47 232 232 232 232 232 47 47 47 47 47 47 47 233 233 233 233 233 233 233 337 337 337 337 337 337 337 337 337 337 337 337 337 337 321 321 321 321 321 321 321 321 321 321 321 341 341 341 341 341 341 341 341 116 116 116 116 116 271 271 271 271 271 271 271 271 271 271 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 277 225 225 225 225 225 225 225 225 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 191 191 289 289 289 289 280 280 280 280 280 280 331 331 331 331 331 331 193 193 193 193 193 233 233 233 233 233 233 233 233 117 117 117 117 245 245 245 245 245 245 245 245 245 245 245 245 340 340 340 340 340 340 340 340 223 223 223 223 223 223 305 305 305 305 305 221 221 221 221 221 288 288 288 288 288 1 1 1 207 207 207 207 207 207 207 207 207 207 207 207 207 207 207 207 207 207 207 207 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 35 35 35 35 35 35 35 35 35 233 233 233 233 233 116 116 116 116 116 331 331 331 331 331 331 331 133 133 133 133 133 133 133 133 133 133 133 281 281 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 291 291 291 291 291 291 291 291 291 277 277 277 277 277 320 320 320 320 119 119 119 119 52 52 52 52 52 52 331 331 331 331 331 331 331 133 133 133 133 133 133 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 331 331 331 331 331 331 331 53 53 53 53 53 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 223 223 223 223 223 223 223 305 305 305 305 305 221 221 221 221 221 221 189 189 189 189 189 189 189 236 236 236 236 236 236 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 288 19 19 19 19 232 232 232 232 232 119 119 119 52 52 52 107 107 107 107 107 107 107 37 37 37 37 37 37 37 37 37 37 37 37 37 220 220 220 220 220 220 220 335 335 335 335 335 335 335 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 219 219 219 219 219 219 219 219 165 165 165 165 165 165 165 165 228 228 228 228 228 228 228 50 50 50 50 50 50 50 171 171 171 171 171 171 171 171 171 171 171 171 225 225 225 225 225 53 53 53 53 53 53 53 53 116 116 116 116 47 47 47 47 47 328 328 328 328 328 328 328 227 227 227 227 227 227 227 227 227 227 227 227 227 133 133 133 133 225 225 225 225 225 225 225 225 225 225 225 225 244 244 244 244 244 244 244 244 244 244 244 244 244 244 215 215 215 215 215 215 215 215 215 215 215 215 215 215 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 232 232 232 232 232 232 232 232 232 232 232 232 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 233 233 233 233 233 233 233 225 225 225 225 225 225 105 105 105 105 105 105 105 105 105 105 105 105 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 288 288 288 288 288 119 119 119 119 119 204 204 204 204 204 204 204 204 207 207 207 207 207 207 207 207 207 207 207 207 207 207 207 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 288 331 331 331 331 331 331 331 53 53 53 53 53 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 133 133 133 133 133 233 233 233 233 233 233 233 280 280 280 280 280 280 280 335 335 335 335 320 320 320 175 175 175 175 175 175 175 175 175 175 175 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 50 50 50 50 50 50 50 50 175 175 175 175 175 175 175 175 175 225 225 225 225 225 225 225 225 225 193 193 193 229 229 229 229 229 229 229 229 273 273 273 273 280 280 280 280 280 280 280 280 280 47 47 47 47 328 328 328 328 328 328 328 119 119 119 52 52 52 52 107 107 107 107 107 107 107 107 107 225 225 225 225 225 225 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 111 111 111 111 111 111 111 111 111 111 111 111 133 133 133 133 133 277 277 277 277 277 277 277 277 204 204 204 204 204 204 204 204 204 287 287 287 287 287 287 287 287 277 277 277 277 209 209 209 209 209 209 209 209 209 340 340 340 340 340 340 340 187 187 187 232 232 232 232 232 119 119 119 52 52 52 52 52 223 223 223 223 223 223 133 133 133 133 133 133 133 133 173 173 173 173 173 173 173 288 288 288 288 288 288 67 67 67 67 67 67 67 67 67 67 277 277 277 277 277 277 277 113 113 113 113 113 113 113 113 113 113 145 145 145 145 145 145 145 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 116 231 231 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 21 21 21 117 117 117 117 189 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 236 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 225 225 225 225 225 133 133 133 133 133 233 233 233 233 233 233 233 117 117 117 117 117 144 144 144 144 144 144 144 144 144 107 107 107 107 107 107 107 107 107 149 149 149 149 149 149 149 149 149 149 149 149 113 113 113 113 113 113 113 113 113 113 113 113 189 189 189 189 189 189 189 340 340 340 340 340 340 340 340 340 340 115 115 115 115 115 115 115 115 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 187 187 187 232 232 232 119 119 119 52 52 52 52 52 52 179 179 179 179 179 179 179 179 179 179 179 179 21 21 21 21 225 225 225 225 225 225 225 225 225 225 244 244 244 244 244 244 244 107 107 107 107 107 107 107 107 100 100 100 100 100 100 100 119 119 119 119 119 52 52 52 52 52 107 107 107 107 107 107 107 107 277 277 277 277 277 277 305 305 305 305 305 305 305 305 305 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 340 340 175 175 175 175 175 175 277 277 277 277 277 277 209 209 209 209 209 209 209 209 209 209 209 209 233 233 233 116 116 116 116 116 247 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 144 144 144 144 144 144 144 107 107 107 107 107 107 107 100 100 100 100 100 100 100 100 100 100 100 100 50 50 50 50 50 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 37 37 37 37 237 237 237 237 237 237 237 237 177 177 177 49 49 49 49 224 224 224 224 224 224 47 47 47 47 47 47 328 328 328 328 327 327 327 327 327 327 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 233 233 233 233 233 233 233 233 233 233 233 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 209 209 209 209 209 209 276 276 276 276 276 276 276 276 279 279 279 279 279 279 279 279 279 279 279 279 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 227 227 227 227 227 227 17 17 17 17 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 48 48 48 48 48 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 53 293 293 293 293 293 293 293 293 293 109 109 109 109 109 109 145 145 145 145 145 145 145 145 145 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 133 133 133 133 232 232 232 232 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 279 279 279 279 279 279 279 37 37 37 37 37 37 37 37 288 288 288 288 288 35 35 35 35 288 288 288 288 288 288 288 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 67 67 67 67 67 67 67 67 67 67 67 225 225 225 225 225 225 225 225 225 333 333 333 333 333 333 333 333 333 333 333 205 205 205 205 205 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 225 225 225 225 225 101 101 101 101 101 101 101 289 289 289 289 289 289 225 225 225 225 225 204 204 204 204 204 204 115 115 115 115 115 115 189 189 189 189 189 281 281 281 281 281 281 281 289 289 289 289 289 289 277 277 277 53 53 53 53 53 53 281 281 281 281 281 281 289 289 289 173 173 173 49 49 49 49 224 224 224 224 224 47 47 47 328 328 328 279 279 279 279 279 279 279 279 279 53 53 53 53 53 233 233 233 233 233 233 233 233 285 285 285 285 285 285 285 285 105 105 105 105 105 105 105 105 105 105 105 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 116 179 179 179 179 179 179 179 179 179 209 209 209 209 209 209 209 276 276 276 276 276 276 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 279 37 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 231 231 231 231 231 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 231 231 231 231 231 231 231 231 231 231 231 231 231 231 193 193 193 193 193 289 289 289 289 289 289 289 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 119 119 119 52 52 52 52 287 287 287 287 287 287 287 287 165 165 165 165 165 165 165 165 165 165 165 109 109 109 49 49 49 224 224 224 224 224 107 107 107 107 107 189 189 189 189 181 181 181 181 181 181 181 181 181 181 181 181 101 101 101 101 101 101 101 233 233 233 116 116 116 179 179 179 179 179 179 144 144 144 144 144 144 144 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 223 223 223 223 223 165 165 165 165 165 165 165 165 116 116 116 116 116 171 171 171 171 171 171 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 273 273 273 273 273 273 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 275 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 189 189 189 189 189 189 189 173 173 173 173 173 173 173 173 173 69 69 69 69 69 276 276 276 276 283 283 283 283 283 283 283 283 283 283 208 208 208 208 179 179 179 179 37 37 37 37 116 116 116 116 116 171 171 171 171 171 171 133 133 133 133 133 277 277 277 277 277 277 277 225 225 225 225 225 225 204 204 204 204 204 204 219 219 219 219 219 219 219 219 225 225 225 225 225 249 249 249 249 249 249 249 249 249 341 341 341 341 341 341 341 116 116 116 119 119 119 52 52 52 52 115 115 115 115 115 69 69 69 69 69 69 69 69 69 69 69 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 133 133 133 133 133 276 276 276 276 276 276 331 331 331 331 331 144 144 144 144 144 144 144 144 291 291 291 291 291 291 291 291 291 291 291 291 277 277 277 277 277 208 208 208 208 208 208 208 208 271 271 271 271 271 271 271 271 271 271 225 225 225 225 225 165 165 165 165 165 165 165 165 165 289 289 289 289 289 289 280 280 280 280 280 280 280 223 223 223 223 223 223 223 165 165 165 165 165 165 165 165 165 165 165 165 165 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 119 119 119 119 49 49 49 49 288 288 288 288 288 288 288 227 227 227 227 17 17 17 17 277 277 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 225 225 48 48 48 48 48 227 227 227 227 227 227 227 227 227 53 53 53 53 53 53 281 281 281 281 281 281 288 288 288 107 107 107 107 208 208 208 208 208 187 187 187 187 221 221 221 221 221 221 221 221 281 281 281 281 273 273 273 273 273 273 133 133 133 133 133 221 221 221 221 221 221 221 289 289 289 289 289 189 189 189 236 236 236 236 236 236 236 236 236 236 236 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 228 228 228 228 228 228 331 331 331 331 331 53 53 53 53 232 232 232 232 232 179 179 179 179 179 179 179 179 179 249 249 249 249 249 249 249 249 249 249 249 228 228 228 228 228 228 331 331 331 331 331 189 189 189 292 292 292 292 292 292 292 292 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 287 287 287 287 287 287 287 188 188 188 188 188 287 287 287 287 287 287 287 287 287 287 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 53 53 53 288 288 288 288 288 288 119 119 119 119 52 52 52 52 52 115 115 115 115 115 115 115 115 115 115 193 193 193 193 285 285 285 285 285 285 285 285 285 285 285 189 189 189 189 189 189 189 189 189 340 340 340 340 340 340 340 340 340 331 331 331 331 144 144 144 144 144 144 144 131 131 131 131 131 131 131 131 329 329 329 329 329 329 277 277 277 277 277 205 205 205 205 205 117 117 117 117 117 117 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 115 115 115 115 115 115 115 115 193 193 193 193 193 285 285 285 285 285 285 285 285 285 285 285 189 189 189 189 189 189 189 189 189 189 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 233 233 233 116 116 116 119 119 119 133 133 133 276 276 276 276 276 331 331 331 331 49 49 49 49 340 340 340 340 340 340 247 247 247 247 247 247 247 233 233 233 233 233 233 225 225 225 225 225 204 204 204 204 204 204 204 219 219 219 219 219 219 219 219 219 219 219 219 219 219 277 277 277 277 37 37 37 37 37 37 37 37 37 108 108 108 108 108 35 35 35 35 35 35 35 273 273 273 273 273 273 49 49 49 49 224 224 224 224 224 271 271 271 271 271 277 277 277 277 189 189 189 189 341 341 341 341 341 341 341 341 341 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 329 329 329 329 329 340 340 340 340 340 340 47 47 47 47 47 47 47 233 233 233 233 233 233 233 233 233 116 116 116 116 116 116 331 331 331 331 331 331 331 331 331 53 53 53 53 232 232 232 232 232 232 232 232 232 232 232 232 219 219 219 219 219 219 219 219 219 219 101 101 101 101 101 101 101 101 101 101 101 101 233 233 233 116 116 116 47 47 47 328 328 328 328 328 219 219 219 219 219 219 219 219 219 165 165 165 165 165 165 165 165 165 165 165 165 165 165 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 119 119 119 119 119 49 49 49 49 49 49 288 288 288 288 119 119 119 119 119 204 204 204 204 204 204 187 187 187 187 187 221 221 221 221 221 221 221 281 281 281 273 273 273 273 273 133 133 133 133 133 133 221 221 221 221 221 221 289 289 289 289 289 49 49 49 116 116 116 116 116 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 229 229 229 229 229 229 229 273 273 273 273 273 49 49 49 49 233 233 233 233 233 204 204 204 204 204 204 204 204 219 219 219 219 219 219 219 219 305 305 305 305 305 116 116 116 116 231 231 231 231 231 231 21 21 21 21 21 21 21 21 288 288 288 288 288 107 107 107 107 208 208 208 208 208 208 208 208 208 208 131 131 131 131 131 233 233 233 233 233 233 204 204 204 204 204 204 271 271 271 271 271 271 145 145 145 145 145 289 289 289 289 289 289 289 289 289 289 193 193 193 193 221 221 221 221 221 221 221 337 337 337 337 49 49 49 225 225 225 225 225 144 144 144 144 144 219 219 219 219 219 219 219 219 219 219 53 53 53 53 229 229 229 229 229 229 229 273 273 273 273 273 49 49 49 49 233 233 233 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 335 335 335 335 335 335 335 335 335 335 133 133 133 133 133 133 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 53 53 53 53 288 288 288 288 47 47 47 47 47 328 328 328 328 328 328 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 337 317 317 317 317 317 340 340 340 340 340 340 340 340 340 340 340 340 340 340 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 21 21 21 21 21 21 225 225 225 225 225 225 225 225 225 225 225 225 225 225 144 144 144 144 144 144 144 144 144 144 144 47 47 47 233 233 233 116 116 116 119 119 119 52 52 52 52 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 69 69 69 277 277 277 277 277 277 277 277 277 49 49 49 49 49 224 224 224 224 224 224 224 227 227 227 227 227 227 227 227 227 227 227 227 133 133 133 133 133 133 133 133 133 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 49 224 224 224 224 224 224 331 331 331 331 49 49 49 49 340 340 340 340 340 175 175 175 175 175 175 193 193 193 193 289 289 289 289 189 189 189 189 236 236 236 236 236 236 236 236 236 236 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 133 133 133 133 133 277 277 277 277 277 277 277 277 277 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 115 115 115 115 115 115 115 115 193 193 193 193 193 341 341 341 341 341 341 341 341 341 341 341 204 204 204 204 204 204 204 204 204 331 331 331 331 331 331 331 193 193 193 193 120 120 120 119 119 119 119 189 189 189 189 189 280 280 280 280 280 280 280 280 280 47 47 47 47 233 233 233 233 233 233 233 233 233 337 337 337 337 337 337 337 337 337 337 321 321 321 321 321 321 321 321 321 345 345 345 345 345 345 345 345 345 333 333 333 333 49 49 49 224 224 224 224 224 224 224 227 227 227 227 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 281 289 289 289 289 289 145 145 145 145 145 145 204 204 204 204 204 47 47 47 47 109 109 109 109 109 85 85 85 85 85 85 85 85 288 288 288 288 288 288 288 288 288 219 219 219 219 219 219 219 219 219 219 219 219 219 333 333 333 333 333 101 101 101 101 101 101 101 101 101 101 101 101 101 101 49 49 49 49 49 49 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 233 233 233 233 233 233 233 229 229 229 229 229 229 229 229 229 197 197 197 197 197 281 281 281 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 193 193 193 277 277 277 277 277 205 205 205 205 205 205 205 205 205 205 205 49 49 49 49 49 49 280 280 280 280 280 280 280 280 175 175 175 175 175 277 277 277 277 277 209 209 209 209 209 209 209 209 209 209 232 232 232 232 232 232 232 232 175 175 175 175 175 175 165 165 165 165 165 165 165 165 165 165 165 165 165 109 109 109 109 49 49 49 49 225 225 225 225 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 175 175 175 175 175 175 305 305 305 305 116 116 116 116 116 207 207 207 207 207 207 207 329 329 329 233 233 233 233 189 189 189 189 236 236 236 236 236 275 275 275 275 275 275 275 165 165 165 165 165 113 113 113 113 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 17 17 17 17 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 48 48 48 48 48 48 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 1 1 1 107 107 107 107 107 107 277 277 277 193 193 193 193 193 193 281 281 281 281 281 281 281 221 221 221 221 221 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 189 189 189 189 189 189 280 280 280 280 280 187 187 187 187 340 340 340 340 340 340 340 50 50 50 50 50 50 50 275 275 275 275 275 275 275 275 275 209 209 209 209 209 209 209 209 224 224 224 224 224 224 224 171 171 171 171 171 171 171 171 171 171 171 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 232 232 232 232 232 232 232 207 207 207 207 207 207 207 207 329 329 329 329 329 329 233 233 233 233 233 189 189 189 189 189 236 236 236 236 236 236 236 236 236 191 191 191 191 341 341 341 49 49 49 233 233 233 288 288 288 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 249 249 249 233 233 233 233 288 288 288 288 335 335 335 320 320 320 279 279 279 279 279 279 279 279 193 193 193 193 193 288 288 288 115 115 115 115 115 115 85 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 84 84 84 84 84 84 84 84 84 84 84 146 146 146 146 146 146 146 146 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 335 335 335 69 69 69 69 69 69 69 69 276 276 276 276 276 276 171 171 171 171 171 171 171 171 171 249 249 249 249 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 229 229 229 229 229 229 229 229 293 293 293 293 293 293 293 293 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 119 119 119 119 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 171 171 171 171 171 171 171 171 171 69 69 69 276 276 276 276 276 276 223 223 223 223 223 223 223 223 223 37 37 37 37 37 37 37 37 37 37 37 37 220 220 220 220 220 220 220 220 220 220 220 220 220 220 51 51 51 51 51 51 51 51 51 51 51 328 328 328 328 328 328 328 328 328 328 328 328 328 328 131 131 131 131 131 131 131 233 233 233 233 233 233 204 204 204 204 204 204 204 204 204 204 204 204 51 51 51 51 51 51 51 121 121 121 121 121 144 144 144 144 144 144 144 231 231 231 231 231 231 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 228 228 228 228 228 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 107 107 107 107 107 208 208 208 208 208 208 208 219 219 219 219 219 219 219 219 219 219 219 219 69 69 69 69 69 69 69 69 225 225 225 225 225 225 225 116 116 116 116 116 171 171 171 171 171 171 171 171 171 171 171 171 171 277 277 277 133 133 133 133 233 233 233 233 233 233 233 233 285 285 285 285 285 285 285 285 285 285 189 189 189 189 189 189 189 189 189 272 272 272 272 272 272 272 272 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 177 177 177 177 177 177 177 177 341 341 341 341 341 341 341 341 193 193 193 193 193 193 193 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 49 49 49 49 49 49 49 49 116 116 116 116 116 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 116 116 67 67 67 67 67 67 225 225 225 225 225 333 333 333 333 333 333 333 169 169 169 169 169 169 340 340 340 340 340 340 340 340 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 116 116 116 116 116 187 187 187 187 187 177 177 177 177 177 177 177 341 341 341 341 341 193 193 193 193 193 193 193 281 281 281 281 281 281 289 289 289 289 289 289 49 49 49 49 116 116 116 116 107 107 107 107 107 189 189 189 189 289 289 289 289 289 289 289 289 333 333 333 333 209 209 209 209 209 209 232 232 232 232 227 227 227 227 227 227 17 17 17 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 48 48 48 48 48 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 53 293 293 293 293 293 293 293 109 109 109 109 109 109 109 109 145 145 145 145 145 145 145 145 288 288 288 288 47 47 47 47 47 233 233 233 116 116 116 227 227 227 193 193 193 193 281 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 113 113 113 113 113 113 113 113 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 191 191 232 232 232 232 232 232 279 279 279 279 279 279 279 273 273 273 273 273 273 101 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 51 51 51 51 51 51 51 51 51 51 51 51 51 51 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 67 67 67 67 67 67 67 67 67 67 276 276 276 276 276 276 271 271 271 271 271 271 271 271 145 145 145 145 145 145 181 181 181 181 181 181 37 37 37 37 37 37 37 273 273 273 273 273 273 273 280 280 280 280 280 280 280 280 280 280 280 107 107 107 107 107 107 189 189 189 189 189 221 221 221 221 221 221 221 221 221 221 221 221 53 53 53 53 53 53 53 53 340 340 340 340 340 340 340 51 51 51 51 51 51 51 51 51 51 51 51 51 51 328 328 328 328 328 328 328 328 328 328 328 328 328 328 328 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 133 133 133 133 276 276 276 276 276 276 276 115 115 115 197 197 197 197 197 281 281 281 281 281 281 281 281 281 197 197 197 197 229 229 229 229 49 49 49 49 49 225 225 225 225 225 225 225 225 37 37 37 37 37 277 277 277 277 277 277 277 277 49 49 49 49 49 289 289 289 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 17 17 17 277 277 277 277 277 277 277 277 193 193 193 193 193 225 225 225 225 225 225 225 48 48 48 48 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 50 50 50 50 50 50 50 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 +291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 193 193 193 193 193 193 193 193 232 232 232 232 232 232 232 232 232 232 232 232 232 232 331 331 331 331 331 331 305 305 305 305 305 305 229 229 229 229 229 229 229 229 49 49 49 49 49 232 232 232 232 232 232 232 331 331 331 331 331 331 331 331 189 189 189 292 292 292 292 292 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 237 237 237 237 237 237 237 237 237 177 177 177 177 49 49 49 49 225 225 225 225 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 340 340 340 35 35 35 35 35 233 233 233 233 116 116 116 116 331 331 331 331 331 189 189 189 121 121 121 121 121 85 85 85 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 288 219 219 219 219 219 219 219 219 219 219 219 219 219 219 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 329 329 329 329 329 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 144 144 144 144 144 115 115 115 115 115 115 115 115 115 115 115 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 220 220 220 220 220 220 220 220 220 179 179 179 179 179 133 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 249 249 249 249 249 249 249 249 249 249 249 249 116 116 116 116 116 116 279 279 279 279 279 279 279 279 279 53 53 53 228 228 228 228 228 228 228 228 228 228 228 175 175 175 175 175 175 175 175 277 277 277 277 277 277 277 164 164 164 164 164 164 164 164 164 164 279 279 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 277 277 277 277 277 209 209 209 209 209 209 209 209 209 221 221 221 221 221 221 221 280 280 280 280 280 280 280 47 47 47 233 233 233 116 116 116 116 331 331 331 49 49 49 340 340 340 340 340 340 340 340 67 67 67 67 67 67 225 225 225 225 225 333 333 333 333 333 333 333 205 205 205 205 205 340 340 340 340 340 340 340 340 287 287 287 287 287 287 287 287 333 333 333 193 193 193 193 281 281 281 281 281 281 281 289 289 289 289 49 49 49 49 116 116 116 116 116 51 51 51 51 51 51 51 51 51 51 272 272 272 272 272 272 272 187 187 187 232 232 232 50 50 50 50 50 50 50 179 179 179 179 179 179 179 179 179 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 116 116 116 116 223 223 223 223 193 193 193 193 289 289 289 289 49 49 49 49 224 224 224 224 224 231 231 231 231 231 231 231 231 231 21 21 21 21 21 21 21 21 288 288 288 288 288 288 288 1 1 1 107 107 107 189 189 189 181 181 181 181 181 181 181 181 181 181 181 181 181 181 181 101 101 101 101 101 101 101 101 101 101 101 101 101 101 233 233 233 233 233 233 233 233 116 116 116 116 116 116 1 1 1 1 1 1 331 331 331 331 331 331 193 193 193 193 292 292 292 292 292 292 292 292 292 292 292 287 287 287 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 144 144 144 144 144 144 144 144 179 179 179 179 179 179 179 179 179 179 179 133 133 133 133 277 277 277 277 277 277 277 277 273 273 273 273 273 273 273 273 273 273 189 189 189 189 189 233 233 233 233 233 233 233 233 340 340 340 340 279 279 279 279 279 279 289 289 289 289 289 289 53 53 53 53 53 53 220 220 220 220 220 220 47 47 47 47 47 177 177 177 177 177 177 177 177 177 177 277 277 277 277 133 133 133 133 281 281 281 281 281 281 281 281 281 281 281 281 189 189 189 189 329 329 329 329 329 225 225 225 225 225 225 204 204 204 204 204 204 291 291 291 291 291 291 291 291 291 291 277 277 277 277 277 277 320 320 320 320 320 320 320 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 208 223 223 223 223 223 223 223 223 305 305 305 305 305 221 221 221 221 221 221 221 288 288 288 288 223 223 223 223 223 223 101 101 101 101 101 101 220 220 220 220 220 220 220 50 50 50 50 50 50 331 331 331 331 331 331 331 331 305 305 305 305 305 229 229 229 229 229 229 229 49 49 49 232 232 232 232 232 47 47 47 47 328 328 328 328 328 231 231 231 231 231 231 231 231 37 37 37 37 37 37 37 37 37 37 277 277 277 277 277 277 277 244 244 244 244 244 244 244 244 244 244 244 187 187 187 187 221 221 221 221 221 221 281 281 281 281 281 281 281 273 273 273 273 273 273 193 193 193 193 193 193 193 277 277 277 277 277 205 205 205 205 205 205 205 205 49 49 49 49 49 233 233 233 233 233 233 280 280 280 280 280 280 47 47 47 233 233 233 116 116 116 116 116 275 275 275 275 275 275 193 193 193 193 217 217 217 217 217 217 217 189 189 189 189 116 116 116 116 219 219 219 219 219 219 219 219 219 219 219 21 21 21 21 21 21 21 21 233 233 233 233 233 233 233 285 285 285 285 285 285 285 285 49 49 49 49 49 49 49 233 233 233 233 233 233 233 233 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 193 193 193 193 193 193 112 112 112 112 112 112 112 112 112 283 283 283 283 283 283 283 283 208 208 208 208 208 208 331 331 331 331 331 331 331 331 69 69 69 69 69 69 69 69 69 69 69 69 69 69 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 53 288 288 288 288 119 119 119 133 133 133 276 276 276 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 50 50 50 50 50 50 279 279 279 279 279 279 279 279 279 279 279 279 165 165 165 165 165 165 165 165 165 165 165 165 165 329 329 329 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 279 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 229 229 229 229 229 229 229 293 293 293 293 293 293 293 293 189 189 189 189 236 236 236 236 236 236 47 47 47 47 109 109 109 109 109 109 85 85 85 85 85 85 85 85 85 288 288 288 288 288 179 179 179 179 179 179 179 179 179 179 144 144 144 144 144 144 227 227 227 227 227 227 227 227 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 292 292 292 292 292 292 292 292 292 292 292 292 292 292 292 292 331 331 331 331 193 193 193 193 193 193 193 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 187 172 172 172 172 172 172 172 172 172 172 191 191 191 288 288 288 179 179 179 37 37 37 37 116 116 116 116 107 107 107 107 107 107 193 193 193 193 193 193 193 193 232 232 232 232 232 232 232 232 232 232 131 131 131 131 131 131 131 131 131 131 131 329 329 329 329 329 329 329 144 144 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 279 279 279 279 279 279 279 279 279 279 279 279 279 225 225 225 225 225 101 101 101 101 101 101 101 101 289 289 289 289 289 289 225 225 225 225 204 204 204 204 204 115 115 115 115 115 189 189 189 189 189 329 329 329 329 329 329 133 133 133 133 133 225 225 225 225 225 225 225 225 225 225 225 49 49 49 49 49 49 49 273 273 273 273 273 273 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 288 288 288 288 179 179 179 37 37 37 328 328 328 107 107 107 107 107 189 189 189 189 232 232 232 232 232 232 219 219 219 219 219 219 49 49 49 49 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 193 193 193 193 117 117 117 117 117 145 145 145 145 116 116 116 116 116 187 187 187 187 233 233 233 233 233 233 233 233 117 117 117 117 117 117 193 193 193 193 193 221 221 221 221 221 221 221 221 49 49 49 49 289 289 289 289 189 189 189 189 189 189 189 328 328 328 328 328 328 328 47 47 47 47 328 328 328 328 328 50 50 50 50 50 50 279 279 279 279 279 279 279 279 279 133 133 133 133 133 233 233 233 233 233 233 233 280 280 280 280 280 47 47 47 328 328 328 179 179 179 179 179 179 179 179 179 337 337 337 337 321 321 321 321 321 321 229 229 229 229 229 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 148 148 148 148 148 148 148 148 148 148 148 148 67 67 67 67 67 67 67 224 224 224 224 224 271 271 271 271 271 271 271 277 277 277 277 193 193 193 289 289 289 289 204 204 204 204 204 204 331 331 331 331 331 133 133 133 133 133 133 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 116 116 116 116 116 116 116 227 227 227 193 193 193 193 281 281 281 281 281 281 281 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 113 49 49 49 49 49 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 1 1 1 331 331 331 49 49 49 340 340 340 340 340 340 219 219 219 219 219 219 101 101 101 101 101 101 101 101 233 233 233 116 116 116 47 47 47 328 328 328 328 47 47 47 47 47 47 47 47 173 173 173 173 173 173 173 173 173 277 277 277 165 165 165 165 165 165 165 165 165 165 165 116 116 116 116 335 335 335 335 335 335 335 335 335 320 320 320 320 320 320 320 320 331 331 331 331 331 331 331 331 331 149 149 149 149 149 149 149 149 149 149 233 233 233 288 288 288 288 288 288 1 1 1 119 119 119 119 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 331 331 331 331 331 331 331 331 133 133 133 133 232 232 232 232 232 102 102 102 102 102 102 102 102 279 279 279 279 279 279 279 279 279 279 279 68 68 68 68 68 68 68 68 68 68 68 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 316 279 279 279 279 279 279 289 289 289 289 289 289 21 21 21 21 277 277 277 277 289 289 289 289 189 189 189 189 236 236 236 236 236 236 236 236 67 67 67 67 67 67 172 172 172 172 172 172 287 287 287 287 287 49 49 49 117 117 117 117 117 117 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 291 291 291 291 291 291 69 69 69 69 69 69 69 69 69 69 288 288 288 288 227 227 227 227 227 165 165 165 165 165 165 165 165 165 109 109 109 109 109 204 204 204 204 204 204 179 179 179 179 179 208 208 208 208 208 208 331 331 331 49 49 49 49 340 340 340 340 340 340 175 175 175 175 175 249 249 249 249 249 249 249 249 249 189 189 189 189 189 189 189 236 236 236 236 287 287 287 287 287 48 48 48 48 48 119 119 119 119 52 52 52 115 115 115 115 115 21 21 21 21 21 21 21 21 21 221 221 221 221 221 221 221 221 289 289 289 289 289 289 145 145 145 145 145 145 145 145 145 145 145 145 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 145 145 145 145 145 145 145 145 145 193 193 193 193 193 193 225 225 225 225 225 225 225 225 225 49 49 49 49 49 340 340 340 340 340 340 340 340 223 223 223 223 223 223 223 193 193 193 193 193 193 273 273 273 273 273 273 273 273 280 280 280 280 280 280 280 280 280 280 280 280 287 287 287 287 287 287 287 287 287 287 333 333 333 333 333 193 193 193 193 193 193 113 113 113 113 113 113 113 113 113 113 113 113 113 288 288 288 288 288 51 51 51 233 233 233 233 233 117 117 117 117 145 145 145 145 145 281 281 281 281 281 281 281 289 289 289 289 289 37 37 37 37 37 37 37 37 37 37 233 233 233 117 117 117 189 189 189 237 237 237 237 237 237 237 237 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 208 208 208 208 208 208 208 208 208 179 179 179 179 179 37 37 37 37 37 116 116 116 116 187 187 187 187 187 221 221 221 221 221 221 281 281 281 281 281 273 273 273 273 273 133 133 133 133 133 133 221 221 221 221 221 221 221 221 289 289 289 289 289 189 189 189 189 189 116 116 116 116 116 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 113 113 113 113 113 49 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 51 51 51 51 51 51 51 272 272 272 272 272 272 272 272 272 272 272 272 272 272 272 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 179 179 179 179 179 179 179 37 37 37 37 37 37 116 116 116 116 116 231 231 231 231 231 231 231 231 249 249 249 249 249 249 249 249 249 249 249 249 249 249 232 232 232 232 232 119 119 119 119 49 49 49 49 288 288 288 119 119 119 48 48 48 48 48 48 279 279 279 279 279 279 279 279 279 279 279 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 47 47 47 47 47 328 328 328 328 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 316 316 316 215 215 215 215 215 215 215 215 215 215 215 215 215 69 69 69 69 69 69 69 69 69 233 233 233 233 233 289 289 289 289 289 289 189 189 189 189 236 236 236 236 236 236 236 67 67 67 67 67 67 67 67 67 172 172 172 172 172 172 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 55 55 55 55 55 55 55 55 55 233 233 233 233 49 49 49 49 49 221 221 221 221 221 221 221 221 221 221 221 85 85 85 85 85 85 85 85 85 233 233 233 233 233 289 289 289 289 49 49 49 49 49 109 109 109 109 109 225 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 305 305 305 305 305 116 116 116 107 107 107 107 107 107 204 204 204 204 204 204 204 204 204 204 287 287 287 287 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 320 320 227 227 227 227 227 227 227 53 53 53 53 53 53 53 53 53 112 112 112 112 112 112 112 171 171 171 171 171 171 171 171 144 144 144 144 179 179 179 179 179 179 144 144 144 144 144 231 231 231 231 231 231 231 165 165 165 165 165 165 165 165 165 165 109 109 109 109 109 109 145 145 145 145 145 145 145 145 340 340 340 340 340 340 219 219 219 219 219 219 337 337 337 337 337 309 309 309 277 277 277 277 277 277 277 205 205 205 205 205 205 205 205 205 21 21 21 21 21 21 21 281 281 281 281 281 281 281 281 281 281 281 49 49 49 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 228 228 228 228 228 228 228 228 219 219 219 219 219 219 333 333 333 333 333 333 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 331 331 331 331 133 133 133 133 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 71 71 71 71 71 71 71 71 71 71 71 71 71 71 71 71 71 71 225 225 225 225 225 121 121 121 121 248 248 248 248 248 248 248 248 248 248 248 248 102 102 102 102 102 102 102 102 102 102 102 102 102 179 179 179 179 179 179 179 37 37 37 37 37 116 116 116 116 50 50 50 50 50 107 107 107 107 107 107 107 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 116 116 116 116 116 179 179 179 179 179 179 179 179 133 133 133 133 117 117 117 117 117 169 169 169 169 169 169 169 169 169 169 220 220 220 220 220 220 335 335 335 335 133 133 133 133 133 133 133 281 281 281 281 281 281 281 289 289 289 289 289 289 145 145 145 145 117 117 117 117 117 168 168 168 168 168 168 168 168 168 168 168 168 168 168 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 279 279 279 279 279 279 279 133 133 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 316 316 331 331 331 331 331 331 133 133 133 133 133 233 233 233 233 233 233 288 288 288 287 287 287 287 48 48 48 48 107 107 107 107 107 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 275 275 275 275 275 275 275 193 193 193 193 193 329 329 329 329 329 329 329 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 193 193 193 193 276 276 276 276 276 276 175 175 175 175 175 175 175 133 133 133 133 289 289 289 289 189 189 189 189 189 189 236 236 236 236 236 236 236 236 50 50 50 50 223 223 223 223 223 223 193 193 193 289 289 289 289 49 49 49 224 224 224 224 107 107 107 107 107 107 107 107 107 107 107 107 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 171 171 171 171 171 145 145 145 145 228 228 228 228 47 47 47 232 232 232 232 232 232 232 67 67 67 67 67 67 67 67 67 277 277 277 277 277 277 173 173 173 173 173 173 173 49 49 49 232 232 232 232 232 47 47 47 47 47 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 49 49 49 228 228 228 228 228 228 187 187 187 187 232 232 232 232 231 231 231 231 231 231 231 231 231 231 231 231 231 231 231 231 231 231 249 249 249 249 249 249 329 329 329 329 48 48 48 48 48 48 279 279 279 279 279 279 279 279 221 221 221 221 221 249 249 249 249 249 249 249 249 249 285 285 285 285 285 285 285 285 285 285 285 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 179 179 179 209 209 209 209 209 340 340 340 340 340 219 219 219 219 219 219 219 219 219 53 53 53 53 229 229 229 229 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 19 19 19 19 19 19 19 232 232 232 232 232 232 119 119 119 52 52 52 52 52 287 287 287 287 287 287 287 287 287 287 277 277 277 277 165 165 165 165 165 165 232 232 232 232 232 232 232 232 287 287 287 287 287 287 49 49 49 49 233 233 233 233 233 233 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 187 187 187 172 172 172 172 172 172 227 227 227 227 17 17 17 277 277 277 277 277 277 277 277 277 277 277 277 193 193 193 193 225 225 225 225 225 225 225 225 48 48 48 48 179 179 179 179 179 179 179 37 37 37 37 116 116 116 116 116 279 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 1 1 1 119 119 119 49 49 49 288 288 288 288 288 288 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 316 179 179 179 179 37 37 37 37 116 116 116 116 116 175 175 175 175 175 175 69 69 69 69 69 69 69 69 69 69 232 232 232 232 232 232 287 287 287 287 287 287 287 48 48 48 48 107 107 107 107 107 107 107 107 107 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 288 275 275 275 275 193 193 193 193 193 193 329 329 329 329 329 144 144 144 144 144 144 144 144 144 144 144 144 287 287 287 287 287 287 287 48 48 48 48 227 227 227 227 227 227 209 209 209 209 209 209 209 288 288 288 288 50 50 50 50 50 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 41 41 41 41 237 237 237 237 237 237 237 237 237 237 237 177 177 177 177 177 177 145 145 145 145 145 145 145 145 145 145 145 320 320 320 320 320 320 320 320 320 320 320 320 171 171 171 171 171 171 171 145 145 145 145 228 228 228 228 228 228 63 63 63 63 63 63 63 63 281 281 281 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 289 289 289 277 277 277 277 165 165 165 165 165 165 225 225 225 225 225 337 337 337 337 337 337 337 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 49 49 49 224 224 224 224 224 224 219 219 219 219 219 219 219 219 219 305 305 305 305 305 116 116 116 116 231 231 231 231 231 231 231 21 21 21 21 21 21 21 288 288 288 288 179 179 179 37 37 37 328 328 328 328 328 107 107 107 107 107 107 193 193 193 232 232 232 232 227 227 227 227 227 227 227 227 227 227 227 227 227 69 69 69 69 69 69 69 69 69 69 276 276 276 47 47 47 47 47 47 281 281 281 281 281 281 289 289 289 289 289 289 21 21 21 21 21 21 21 21 21 233 233 233 233 233 189 189 189 189 189 189 189 285 285 285 285 285 285 285 285 285 285 285 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 35 35 35 35 35 35 35 35 35 35 35 221 221 221 221 221 285 285 285 285 285 285 49 49 49 49 49 49 49 225 225 225 225 225 225 225 204 204 204 204 204 204 204 204 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 277 277 277 193 193 193 221 221 221 221 221 221 221 49 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 115 115 115 115 115 115 115 115 115 53 53 53 53 53 53 53 53 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 171 171 171 171 171 171 171 171 171 144 144 144 144 144 144 171 171 171 171 171 171 171 171 171 171 171 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 328 328 328 328 328 328 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 221 221 221 221 221 221 221 221 221 49 49 49 49 233 233 233 233 233 233 233 233 233 233 233 233 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 191 288 288 288 288 288 288 288 331 331 331 331 331 331 49 49 49 49 49 340 340 340 340 340 340 340 340 340 340 1 1 1 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 119 119 119 49 49 49 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 227 227 227 17 17 17 277 277 277 277 193 193 193 193 193 193 193 193 225 225 225 48 48 48 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 69 69 69 69 69 69 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 165 165 165 165 165 165 221 221 221 221 221 221 221 221 189 189 189 189 189 236 236 236 236 236 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 53 53 53 53 53 232 232 232 232 232 47 47 47 328 328 328 328 328 179 179 179 179 179 179 148 148 148 148 148 148 148 148 148 148 148 148 148 148 148 148 148 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 53 53 53 53 288 288 288 288 288 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 113 113 113 113 113 113 113 113 49 49 49 49 49 49 49 49 224 224 224 224 224 224 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 340 340 340 340 340 340 67 67 67 67 67 67 225 225 225 225 225 229 229 229 229 229 229 253 253 253 253 253 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 171 171 171 171 171 171 171 171 69 69 69 69 69 69 277 277 277 277 277 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 288 288 288 287 287 287 287 48 48 48 48 48 279 279 279 279 279 279 279 279 279 49 49 49 49 273 273 273 273 273 273 273 249 249 249 249 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 340 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 19 19 19 19 19 19 19 19 19 19 276 276 276 276 335 335 335 335 335 335 335 320 320 320 320 320 320 320 320 320 320 187 187 187 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 147 147 147 147 147 147 147 147 147 147 147 147 147 147 233 233 233 233 233 189 189 189 189 189 189 189 189 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 288 227 227 227 227 17 17 17 277 277 277 277 277 277 193 193 193 193 193 225 225 225 225 225 225 225 48 48 48 48 48 48 48 48 48 48 48 48 48 48 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 115 115 115 115 115 189 189 189 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 233 233 233 117 117 117 117 117 189 189 189 116 116 116 116 1 1 1 331 331 331 193 193 193 232 232 232 232 232 232 232 232 327 327 327 327 327 327 327 265 265 265 265 265 265 265 265 265 265 265 265 280 280 280 280 280 280 280 280 280 280 280 280 280 280 275 275 275 189 189 189 189 289 289 289 289 289 289 289 289 289 289 289 149 149 149 149 149 149 149 149 149 149 233 233 233 233 233 233 116 116 116 287 287 287 287 48 48 48 48 179 179 179 179 179 179 179 148 148 148 148 148 148 148 148 148 148 148 148 148 148 148 148 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 335 335 335 335 335 335 133 133 133 133 280 280 280 280 280 280 280 47 47 47 328 328 328 328 219 219 219 219 219 219 219 219 219 219 69 69 69 69 69 69 69 69 277 277 277 277 277 277 277 277 277 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 133 133 133 133 133 116 116 116 116 116 227 227 227 227 227 17 17 17 277 277 277 277 277 277 277 277 193 193 193 193 193 225 225 225 225 225 225 225 48 48 48 48 48 48 48 48 48 48 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 340 340 340 340 340 340 340 340 187 187 187 187 187 187 172 172 172 172 172 172 175 175 175 175 175 175 175 175 133 133 133 133 289 289 289 289 289 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 107 107 107 107 107 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 340 340 340 340 340 340 340 171 171 171 171 171 171 145 145 145 145 228 228 228 228 228 67 67 67 67 67 67 67 67 67 67 277 277 277 277 277 173 173 173 173 173 173 173 49 49 49 49 232 232 232 232 47 47 47 47 47 47 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 225 225 225 225 49 49 49 49 229 229 229 229 229 229 229 340 340 340 340 340 187 187 187 187 187 232 232 232 231 231 231 231 231 231 231 231 231 249 249 249 249 249 249 329 329 329 329 329 48 48 48 48 48 279 279 279 279 279 279 279 221 221 221 221 221 221 221 249 249 249 249 249 249 249 249 285 285 285 285 285 285 285 285 285 285 48 48 48 48 48 48 331 331 331 331 331 331 144 144 144 144 144 144 271 271 271 271 271 271 271 271 271 21 21 21 21 21 21 21 277 277 277 277 277 288 288 288 288 47 47 47 47 328 328 328 119 119 119 119 119 204 204 204 204 335 335 335 335 335 335 335 335 335 335 321 321 321 321 321 321 321 321 321 321 321 345 345 345 345 345 345 345 345 345 317 317 317 317 49 49 49 224 224 224 224 224 224 279 279 279 279 279 279 279 279 279 279 273 273 273 273 273 273 277 277 277 193 193 193 193 193 193 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 331 331 331 331 331 331 331 331 331 331 149 149 149 149 149 149 149 149 149 149 149 220 220 220 220 220 220 220 19 19 19 19 19 19 19 232 232 232 232 232 232 232 232 232 131 131 131 131 131 131 131 233 233 233 233 233 233 204 204 204 204 204 204 204 204 204 331 331 331 331 331 331 331 331 331 133 133 133 133 133 133 133 133 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 275 275 275 275 275 275 275 133 133 133 133 177 177 177 177 177 177 177 337 337 337 337 337 49 49 49 49 225 225 225 225 225 169 169 169 169 169 169 289 289 289 289 289 189 189 189 189 116 116 116 116 116 35 35 35 35 35 35 35 35 35 35 35 35 35 329 329 329 329 329 329 49 49 49 233 233 233 233 233 233 225 225 225 225 212 212 212 212 212 212 212 212 212 171 171 171 171 171 171 171 171 171 171 171 171 171 171 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 233 233 233 233 233 281 281 281 281 281 281 281 289 289 289 289 289 289 133 133 133 133 133 116 116 116 116 47 47 47 328 328 328 328 328 107 107 107 107 107 107 209 209 209 209 209 189 189 189 189 189 236 236 236 236 236 236 35 35 35 232 232 232 232 232 232 47 47 47 47 47 47 47 47 47 47 233 233 233 233 233 233 181 181 181 181 181 181 181 181 181 181 181 181 181 149 149 149 149 149 149 149 149 149 149 116 116 116 116 116 116 51 51 51 51 51 51 51 51 51 51 328 328 328 328 328 328 195 195 195 233 233 233 233 233 49 49 49 49 49 329 329 329 329 329 165 165 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 113 113 113 113 113 113 113 49 49 49 49 49 49 224 224 224 224 171 171 171 171 171 171 171 171 171 171 133 133 133 133 133 225 225 225 225 225 225 225 225 225 288 288 288 288 119 119 119 119 49 49 49 288 288 288 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 208 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 116 116 116 116 116 116 116 116 116 116 275 275 275 205 205 205 205 205 281 281 281 281 281 281 281 281 281 281 281 281 281 281 209 209 209 209 209 209 209 209 209 209 209 209 209 329 329 329 329 329 116 116 116 116 116 50 50 50 50 50 50 50 50 50 50 50 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 49 49 49 49 49 329 329 329 329 329 329 329 193 193 193 193 193 193 193 193 193 193 193 193 193 276 276 276 276 276 276 276 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 133 133 133 133 133 133 233 233 233 233 233 233 233 233 289 289 289 289 289 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 215 215 215 215 215 215 215 215 215 215 249 249 249 249 249 249 249 249 225 225 225 225 225 225 225 225 225 225 225 225 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 291 291 291 291 291 291 291 291 291 69 69 69 69 69 69 69 69 69 69 288 288 288 288 288 288 288 288 288 187 187 187 187 232 232 232 232 232 135 135 135 135 221 221 221 221 221 221 221 221 281 281 281 281 281 281 221 221 221 221 225 225 225 225 49 49 49 229 229 229 229 229 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 49 49 232 232 232 232 232 232 232 271 271 271 271 271 271 271 265 265 265 265 265 265 265 265 265 265 265 265 265 265 233 233 233 289 289 289 289 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 17 17 17 277 277 277 193 193 193 193 193 193 193 193 193 193 193 193 193 193 193 193 193 193 225 225 225 225 225 225 225 48 48 48 48 48 48 48 48 48 48 48 35 35 35 233 233 233 233 233 233 116 116 116 116 116 116 116 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 316 219 219 219 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 53 53 293 293 293 293 293 109 109 109 109 109 109 145 145 145 145 288 288 288 47 47 47 47 328 328 328 328 328 328 328 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 209 209 209 209 209 209 273 273 273 273 273 49 49 49 49 224 224 224 224 224 224 224 47 47 47 47 117 117 117 117 117 21 21 21 21 21 21 21 21 273 273 273 273 289 289 289 289 289 189 189 189 236 236 236 236 236 50 50 50 50 50 50 50 50 50 50 50 1 1 1 1 1 1 1 107 107 107 107 107 107 107 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 171 171 171 171 171 171 171 171 277 277 277 53 53 53 228 228 228 47 47 47 232 232 232 67 67 67 67 67 67 67 67 67 67 277 277 277 277 277 277 173 173 173 173 173 173 173 49 49 49 49 232 232 232 47 47 47 47 47 281 281 281 281 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 225 49 49 49 49 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 133 133 133 133 133 133 133 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 119 119 119 119 119 52 52 52 52 52 52 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 149 149 149 149 149 149 149 149 149 149 225 225 225 225 225 225 225 225 225 225 116 116 116 116 116 331 331 331 331 331 49 49 49 49 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 149 149 149 149 149 149 149 149 289 289 289 289 49 49 49 233 233 233 225 225 225 225 204 204 204 204 204 204 204 287 287 287 287 287 287 287 287 287 287 287 287 149 149 149 149 149 149 149 233 233 233 233 189 189 189 236 236 236 236 236 236 236 51 51 51 51 51 273 273 273 273 273 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 116 116 116 116 116 116 116 115 115 115 115 115 115 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 331 331 331 331 331 331 305 305 305 305 116 116 116 107 107 107 107 107 208 208 208 208 208 208 208 279 279 279 279 279 49 49 49 49 49 273 273 273 273 273 273 273 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 341 341 341 341 341 341 116 116 116 116 116 116 35 35 35 35 288 288 288 288 231 231 231 231 231 231 231 231 231 231 231 231 231 53 53 53 53 53 53 53 293 293 293 293 293 293 293 293 189 189 189 189 236 236 236 236 236 236 236 236 35 35 35 35 35 35 35 35 35 173 173 173 173 289 289 289 289 289 144 144 144 144 119 119 119 119 119 193 193 193 193 193 193 193 193 193 193 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 231 231 231 231 231 231 231 231 231 231 231 231 231 231 231 231 231 53 53 53 53 53 53 53 53 53 293 293 293 293 293 293 293 293 293 293 293 189 189 189 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 53 53 53 288 288 288 288 288 19 19 19 19 19 19 19 19 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 147 147 147 147 147 147 147 147 147 147 147 147 147 147 147 147 292 292 292 292 292 292 292 271 271 271 271 271 271 305 305 305 305 288 288 288 288 288 279 279 279 279 53 53 53 53 53 53 112 112 112 112 112 50 50 50 50 231 231 231 231 231 231 231 231 231 231 231 231 249 249 249 249 249 249 285 285 285 285 285 285 285 285 285 285 285 49 49 49 232 232 232 232 232 187 187 187 233 233 233 233 289 289 289 289 289 289 48 48 48 335 335 335 69 69 69 69 69 69 69 276 276 276 276 276 179 179 179 179 179 179 179 179 179 179 179 179 133 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 115 115 115 115 115 189 189 189 229 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 37 37 37 37 233 233 233 117 117 117 117 189 189 189 189 116 116 116 116 116 116 116 116 115 115 115 115 197 197 197 281 281 281 281 281 281 281 49 49 49 49 49 49 273 273 273 273 273 273 273 273 277 277 277 277 321 321 321 321 329 329 329 329 329 329 189 189 189 189 237 237 237 237 237 237 237 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 193 193 193 193 193 193 193 193 280 280 280 280 280 280 280 280 280 179 179 179 179 179 179 179 179 179 37 37 37 37 116 116 116 116 107 107 107 107 107 107 193 193 193 193 193 232 232 232 232 232 232 232 232 232 115 115 115 115 115 115 115 115 53 53 53 53 53 53 232 232 232 232 232 232 232 232 232 232 331 331 331 331 331 331 331 189 189 189 121 121 121 121 121 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 179 179 179 179 179 179 179 179 179 148 148 148 148 148 148 148 47 47 47 47 117 117 117 329 329 329 329 329 329 329 329 329 101 101 101 101 101 101 101 101 101 101 101 280 280 280 280 280 280 280 280 280 107 107 107 107 107 107 209 209 209 209 189 189 189 189 189 189 236 236 236 236 236 236 236 35 35 35 35 35 35 35 35 35 35 35 35 35 35 281 281 281 281 281 281 281 281 281 281 281 221 221 221 221 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 233 233 233 116 116 116 116 116 116 227 227 227 227 227 227 227 227 53 53 53 53 53 53 53 53 281 281 281 281 281 281 288 288 288 288 288 271 271 271 271 271 271 271 271 271 271 271 145 145 145 145 145 145 145 173 173 173 173 173 173 173 173 173 173 173 69 69 69 69 69 69 277 277 277 277 277 277 277 277 277 280 280 280 280 280 280 280 280 280 280 280 280 107 107 107 107 107 107 204 204 204 204 204 204 115 115 115 115 115 197 197 197 197 197 281 281 281 281 49 49 49 49 273 273 273 273 273 273 273 277 277 277 277 277 321 321 321 321 321 321 321 321 321 321 321 329 329 329 329 329 329 329 329 329 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 224 224 224 224 224 224 224 224 331 331 331 331 331 209 209 209 209 209 209 209 209 328 328 328 328 328 107 107 107 107 107 107 193 193 193 193 193 232 232 232 232 232 232 232 232 291 291 291 291 291 291 193 193 193 193 237 237 237 237 237 221 221 221 221 221 221 189 189 189 189 236 236 236 236 236 236 236 47 47 47 47 109 109 109 109 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 288 191 191 191 191 288 288 288 288 288 171 171 171 144 144 144 144 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 228 228 228 228 228 228 228 228 287 287 287 287 287 287 287 287 287 287 287 101 101 101 101 101 101 101 101 101 101 101 101 101 101 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 331 331 331 331 331 331 193 193 193 193 233 233 233 233 233 289 289 289 289 289 289 289 289 289 144 144 144 187 187 187 187 232 232 232 232 232 232 232 171 171 171 171 171 171 171 171 37 37 37 37 37 37 37 37 37 37 37 37 37 221 221 221 221 221 221 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 275 275 275 275 275 275 275 189 189 189 189 289 289 289 289 289 289 289 289 289 149 149 149 149 149 149 149 149 149 149 233 233 233 233 233 116 116 116 227 227 227 227 227 17 17 17 277 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 225 48 48 48 48 48 48 48 48 48 48 48 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 39 39 39 39 39 39 39 39 225 225 225 225 49 49 49 49 49 49 177 177 177 177 177 341 341 341 341 341 341 37 37 37 37 37 37 37 37 233 233 233 233 117 117 117 117 144 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 273 273 273 273 133 133 133 133 133 133 133 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 144 144 144 144 144 144 144 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 340 51 51 51 51 51 51 272 272 272 272 179 179 179 179 179 179 209 209 209 276 276 276 276 276 276 331 331 331 331 331 331 331 331 331 53 53 53 53 232 232 232 232 232 232 115 115 115 115 115 115 164 164 164 164 164 164 164 164 164 107 107 107 107 189 189 189 189 189 173 173 173 173 173 173 173 173 173 69 69 69 276 276 276 276 276 276 219 219 219 219 219 219 219 219 219 277 277 277 277 193 193 193 193 193 281 281 281 281 281 281 281 281 281 281 229 229 229 229 229 229 49 49 49 49 49 49 49 49 280 280 280 280 280 280 280 280 280 47 47 47 47 47 233 233 233 116 116 116 116 116 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 116 116 116 116 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 331 331 331 331 49 49 49 49 340 340 340 340 340 340 175 175 175 175 175 175 249 249 249 249 189 189 189 189 189 189 189 236 236 236 287 287 287 287 287 287 188 188 188 175 175 175 175 175 175 175 133 133 133 133 133 133 288 288 288 166 166 166 166 166 223 223 223 223 193 193 193 289 289 289 289 289 289 289 289 49 49 49 49 224 224 224 224 224 224 224 175 175 175 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 171 171 171 171 171 171 171 171 145 145 145 145 228 228 228 228 119 119 119 119 204 204 204 47 47 47 47 47 281 281 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 49 49 49 49 228 228 228 228 228 228 228 247 247 247 247 247 247 247 247 329 329 329 329 329 329 329 144 144 144 144 187 187 187 187 232 232 232 232 232 232 1 1 1 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 191 191 191 191 191 191 191 191 191 232 232 232 232 119 119 119 52 52 52 279 279 279 279 279 279 279 279 279 279 273 273 273 273 273 273 273 273 277 277 277 193 193 193 193 193 193 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 316 316 47 47 47 233 233 233 116 116 116 116 102 102 102 102 102 102 102 102 102 102 102 102 102 102 179 179 179 179 37 37 37 37 37 328 328 328 328 287 287 287 287 287 287 287 287 287 69 69 69 69 69 69 69 69 221 221 221 221 221 221 288 288 288 288 187 187 187 288 288 288 247 247 247 247 247 247 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 67 67 67 67 67 67 67 67 67 67 172 172 172 172 172 172 172 172 47 47 47 47 233 233 233 116 116 116 116 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 232 232 232 232 232 232 232 232 232 131 131 131 131 131 131 131 131 329 329 329 329 329 329 144 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 193 193 193 193 193 233 233 233 233 233 233 233 233 233 233 233 233 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 208 208 208 208 208 291 291 291 291 291 291 291 69 69 69 69 69 69 69 69 69 288 288 288 288 288 331 331 331 331 209 209 209 209 116 116 116 175 175 175 175 175 175 175 133 133 133 133 288 288 288 288 50 50 50 50 50 107 107 107 107 107 107 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 293 293 293 293 293 337 337 337 337 337 316 316 316 316 187 187 187 340 340 340 340 340 340 175 175 175 175 175 175 193 193 193 193 289 289 289 289 289 189 189 189 189 236 236 236 236 236 236 236 236 51 51 51 51 51 51 51 51 272 272 272 272 272 272 272 272 187 187 187 232 232 232 232 232 232 232 335 335 335 335 335 335 335 335 335 335 193 193 193 193 193 277 277 277 277 277 277 277 277 277 277 340 340 340 340 340 340 335 335 335 335 335 335 320 320 320 320 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 209 209 209 209 209 340 340 340 340 340 340 279 279 279 279 279 279 279 279 279 279 193 193 193 193 221 221 221 221 221 221 221 281 281 281 281 281 281 281 281 289 289 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 179 179 179 208 208 208 208 208 208 208 208 208 191 191 191 191 191 191 341 341 341 341 341 341 49 49 49 232 232 232 232 232 232 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 279 279 279 279 279 279 279 279 279 279 279 273 273 273 273 273 277 277 277 277 277 277 100 100 100 100 100 100 100 100 100 100 100 100 100 100 131 131 131 131 131 340 340 340 340 340 340 179 179 179 179 208 208 208 208 208 331 331 331 331 331 331 331 331 53 53 53 53 233 233 233 233 233 233 233 280 280 280 280 280 280 280 280 280 331 331 331 331 331 331 331 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 189 189 189 189 340 340 340 340 340 179 179 179 179 179 179 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 277 277 277 277 288 288 288 288 287 287 287 287 277 277 277 277 277 53 53 53 53 53 109 109 109 109 49 49 49 49 225 225 225 225 225 340 340 340 340 340 340 340 179 179 179 193 193 193 193 228 228 228 228 228 50 50 50 50 50 175 175 175 175 175 175 175 175 305 305 305 305 305 116 116 116 116 116 116 116 116 115 115 115 115 115 115 209 209 209 209 209 209 209 209 209 209 209 209 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 335 335 335 335 335 335 335 320 320 320 320 231 231 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 179 179 179 179 179 179 179 179 179 84 84 84 84 84 84 84 84 84 115 115 115 115 115 115 115 115 115 115 115 115 115 115 133 133 133 133 133 133 133 281 281 281 281 281 281 281 281 273 273 273 273 273 277 277 277 189 189 189 189 288 288 288 179 179 179 179 179 179 179 21 21 21 21 21 21 21 21 277 277 277 277 277 116 116 116 116 116 187 187 187 187 289 289 289 289 289 280 280 280 280 280 280 175 175 175 175 175 175 21 21 21 21 21 21 21 21 288 288 288 288 287 287 287 287 287 188 188 188 188 188 107 107 107 107 107 107 208 208 208 208 208 208 208 208 208 208 287 287 287 188 188 188 175 175 175 175 175 175 175 133 133 133 133 133 133 288 288 288 179 179 179 179 179 179 179 179 179 179 101 101 101 101 101 101 101 101 101 145 145 145 145 145 145 145 145 145 116 116 116 179 179 179 179 179 179 179 179 133 133 133 133 225 225 225 225 225 225 225 225 225 225 225 272 272 272 272 272 272 272 272 272 272 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 133 133 133 277 277 277 277 277 340 340 340 340 340 340 231 231 231 231 231 231 133 133 133 133 133 133 329 329 329 329 329 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 131 131 131 131 131 131 131 131 131 233 233 233 233 233 205 205 205 205 205 205 205 109 109 109 109 49 49 49 49 49 49 49 117 117 117 117 204 204 204 204 204 204 287 287 287 287 287 188 188 188 107 107 107 107 107 204 204 204 204 204 204 204 204 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 116 116 116 107 107 107 107 53 53 53 53 288 288 288 288 288 119 119 119 119 119 249 249 249 249 249 249 249 249 340 340 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 321 321 321 321 321 321 321 321 273 273 273 273 273 273 189 189 189 189 189 189 189 189 116 116 116 116 116 179 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 172 172 172 172 172 172 172 172 172 172 172 172 175 175 175 175 175 277 277 277 277 249 249 249 249 249 249 249 249 232 232 232 232 223 223 223 223 193 193 193 193 289 289 289 49 49 49 49 224 224 224 224 224 171 171 171 171 171 171 171 171 171 171 171 171 277 277 277 133 133 133 233 233 233 233 233 233 233 233 112 112 112 112 112 112 112 112 112 112 107 107 107 107 107 107 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 131 131 131 340 340 340 340 340 340 279 279 279 279 279 279 279 279 279 321 321 321 321 321 321 321 321 232 232 232 232 131 131 131 131 131 131 131 340 340 340 340 340 340 340 335 335 335 335 335 335 335 320 320 320 320 320 115 115 115 115 115 115 115 115 320 320 320 320 320 320 320 320 175 175 175 175 175 175 175 175 193 193 193 193 193 193 288 288 288 288 288 288 288 288 288 288 331 331 331 331 331 331 331 53 53 53 53 232 232 232 232 232 232 232 232 232 107 107 107 107 107 277 277 277 277 249 249 249 249 249 249 249 220 220 220 220 220 220 220 191 191 191 233 233 233 233 289 289 289 289 289 289 316 316 316 335 335 335 305 305 305 305 305 276 276 276 276 276 276 331 331 331 331 331 331 331 331 331 331 165 165 165 165 165 165 165 165 165 165 165 165 165 340 340 340 340 340 340 340 47 47 47 47 233 233 233 116 116 116 287 287 287 287 287 287 287 287 69 69 69 69 69 69 69 69 69 288 288 288 288 288 288 288 288 288 279 279 279 279 279 279 53 53 53 53 53 229 229 229 229 229 229 293 293 293 293 293 293 293 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 209 209 209 209 209 209 340 340 340 340 340 340 340 51 51 51 51 51 51 51 51 51 51 51 51 51 51 272 272 272 272 272 272 272 272 47 47 47 47 47 47 233 233 233 116 116 116 116 116 116 67 67 67 67 67 67 67 67 67 67 67 67 172 172 172 172 172 172 287 287 287 287 287 48 48 48 48 119 119 119 119 119 52 52 52 223 223 223 223 223 223 223 223 21 21 21 21 21 21 109 109 109 109 109 109 281 281 281 281 289 289 289 289 289 289 144 144 144 144 219 219 219 219 219 219 219 219 219 219 37 37 37 37 37 37 37 37 37 233 233 233 233 233 145 145 145 145 145 145 145 145 145 145 205 205 205 205 205 205 340 340 340 340 340 340 340 340 340 146 146 146 146 146 146 146 146 146 146 119 119 119 119 119 52 52 52 52 52 279 279 279 279 279 279 279 279 289 289 289 289 289 289 165 165 165 165 165 165 165 165 165 165 165 165 165 289 289 289 289 289 289 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 288 288 288 171 171 171 171 171 171 171 171 171 171 171 149 149 149 149 149 149 149 149 149 149 149 149 149 149 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 279 279 279 279 279 279 279 49 49 49 49 177 177 177 217 217 217 217 217 217 217 217 133 133 133 133 133 133 281 281 281 281 281 281 281 289 289 289 289 289 189 189 189 116 116 116 116 116 175 175 175 175 175 175 193 193 193 193 289 289 289 189 189 189 189 236 236 236 236 236 236 236 236 50 50 50 50 50 179 179 179 179 179 179 179 179 179 249 249 249 249 249 249 249 249 249 249 249 249 249 228 228 228 228 228 228 107 107 107 107 107 107 107 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 107 53 53 53 288 288 288 288 102 102 102 102 102 102 102 102 102 102 102 102 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 116 116 116 116 231 231 231 231 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 171 171 171 171 171 171 171 171 171 171 225 225 225 225 225 37 37 37 37 37 37 37 37 288 288 288 287 287 287 287 287 287 48 48 48 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 164 164 164 164 164 164 164 227 227 227 227 227 227 164 164 164 164 164 164 164 164 164 164 107 107 107 107 107 208 208 208 208 208 208 208 208 208 208 208 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 224 224 275 275 275 275 275 275 275 275 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 47 47 47 47 47 47 47 47 47 228 228 228 228 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 288 288 288 288 288 288 288 279 279 279 279 279 279 279 279 165 165 165 165 165 165 165 165 165 165 189 189 189 236 236 236 236 236 236 236 119 119 119 133 133 133 133 276 276 276 276 276 276 276 231 231 231 231 231 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 53 288 288 288 288 288 231 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 223 223 223 223 223 223 223 223 223 223 223 223 53 53 53 53 53 233 233 233 233 233 117 117 117 117 117 49 49 49 49 232 232 232 232 232 232 279 279 279 279 279 279 279 289 289 289 289 289 289 277 277 277 277 209 209 209 209 209 288 288 288 288 288 288 35 35 35 35 35 35 35 277 277 277 277 277 277 277 277 277 277 277 277 49 49 49 49 109 109 109 109 109 340 340 340 340 340 340 340 340 340 171 171 171 171 171 171 144 144 144 144 227 227 227 227 227 227 227 227 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 175 175 175 175 175 175 175 193 193 193 193 193 328 328 328 227 227 227 227 227 227 227 227 227 208 208 208 208 208 208 208 208 50 50 50 50 231 231 231 231 231 231 231 231 231 231 231 165 165 165 165 165 165 165 165 165 165 289 289 289 289 189 189 189 328 328 328 328 328 328 328 328 107 107 107 107 107 107 69 69 69 69 69 69 69 69 277 277 277 277 232 232 232 232 35 35 35 35 35 288 288 288 288 288 288 223 223 223 223 223 223 209 209 209 209 209 209 209 209 209 209 209 209 209 281 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 133 133 133 277 277 277 49 49 49 224 224 224 107 107 107 107 107 204 204 204 204 204 204 204 204 50 50 50 50 50 50 50 50 50 50 275 275 275 275 275 275 275 275 275 275 193 193 193 193 193 193 193 193 281 281 281 281 281 281 281 281 281 281 220 220 220 220 220 220 220 231 231 231 231 231 248 248 248 248 248 248 248 227 227 227 227 227 227 227 227 37 37 37 289 289 289 289 289 289 144 144 144 144 144 179 179 179 179 179 179 179 179 179 320 320 320 320 320 320 331 331 331 331 331 208 208 208 208 208 208 208 208 208 175 175 175 175 175 175 175 133 133 133 133 133 133 133 133 133 133 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 53 53 53 288 288 288 288 288 99 99 99 99 99 99 99 224 224 224 224 224 224 224 171 171 171 171 171 171 171 171 171 209 209 209 209 209 209 209 224 224 224 224 224 224 1 1 1 207 207 207 207 207 207 207 207 207 207 207 207 207 207 207 207 207 341 341 341 341 341 341 341 341 205 205 205 205 205 144 144 144 144 144 144 144 144 187 187 187 187 187 187 232 232 232 227 227 227 227 227 227 100 100 100 100 100 100 100 100 100 100 227 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 233 233 233 233 233 116 116 116 116 47 47 47 233 233 233 116 116 116 116 116 279 279 279 279 279 279 279 279 279 225 225 225 225 225 209 209 209 209 209 209 209 272 272 272 272 272 279 279 279 279 279 279 279 279 279 279 279 279 85 85 85 85 85 85 85 85 85 85 85 233 233 233 233 233 117 117 117 117 117 144 144 144 144 144 144 35 35 35 35 35 288 288 288 288 288 231 231 231 231 231 101 101 101 101 101 101 101 101 101 101 101 101 289 289 289 289 289 289 280 280 280 280 280 187 187 187 187 172 172 172 172 331 331 331 331 208 208 208 175 175 175 175 175 175 175 133 133 133 288 288 288 288 50 50 50 50 107 107 107 107 107 107 107 107 107 107 69 69 69 69 69 69 69 277 277 277 277 277 277 277 232 232 232 219 219 219 219 219 219 219 219 49 49 49 49 233 233 233 233 165 165 165 165 165 165 165 165 165 165 165 117 117 117 117 117 205 205 205 205 205 205 205 49 49 49 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 187 187 187 187 187 187 232 232 232 232 232 119 119 119 119 204 204 204 204 204 204 204 204 204 204 204 131 131 131 131 131 131 131 233 233 233 233 233 233 116 116 116 116 116 331 331 331 331 331 208 208 208 208 208 208 208 115 115 115 115 115 115 197 197 197 197 197 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 101 117 117 117 117 117 189 189 189 189 189 189 189 116 116 116 116 287 287 287 287 287 320 320 320 320 320 320 320 320 35 35 35 35 35 35 35 35 35 35 35 35 281 281 281 281 281 281 281 281 281 281 220 220 220 220 220 220 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 189 189 189 340 340 340 340 340 279 279 279 279 279 279 273 273 273 273 273 273 133 133 133 133 133 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 144 144 144 144 144 144 144 144 144 287 287 287 287 287 188 188 188 188 271 271 271 271 271 271 271 271 193 193 193 193 193 220 220 220 220 220 220 220 51 51 51 51 51 51 51 280 280 280 280 280 280 83 83 83 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 288 288 288 331 331 331 331 331 331 331 331 331 53 53 53 53 53 232 232 232 232 232 232 232 232 232 232 232 331 331 331 331 193 193 193 193 193 232 232 232 232 283 283 283 283 283 283 283 283 208 208 208 208 208 208 331 331 331 331 331 331 331 133 133 133 133 133 233 233 233 233 288 288 288 288 288 288 247 247 247 247 247 247 247 247 329 329 329 329 329 329 144 144 144 144 144 144 287 287 287 287 287 188 188 188 175 175 175 175 175 175 175 175 133 133 133 133 133 288 288 288 179 179 179 179 179 179 179 179 179 144 144 144 144 144 144 144 223 223 223 223 223 193 193 193 193 289 289 289 289 289 49 49 49 49 224 224 224 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 208 208 208 208 208 208 179 179 179 179 179 179 179 149 149 149 149 149 149 149 149 149 149 149 116 116 116 116 223 223 223 223 223 223 223 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 331 331 331 331 209 209 209 209 209 209 209 209 220 220 220 220 220 220 283 283 283 283 283 283 283 208 208 208 208 208 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 175 175 175 175 175 175 249 249 249 249 249 249 249 249 249 249 189 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 331 331 331 331 331 331 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 233 233 233 233 233 288 288 288 179 179 179 179 179 179 179 179 179 179 144 144 144 144 144 144 144 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 149 149 149 149 149 149 149 149 149 116 116 116 116 116 107 107 107 107 107 107 100 100 100 100 100 100 100 100 100 100 100 100 275 275 275 275 193 193 193 193 193 113 113 113 113 113 113 113 113 113 145 145 145 116 116 116 116 116 116 116 116 279 279 279 279 273 273 273 273 273 133 133 133 133 133 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 145 145 145 145 340 340 340 340 340 340 340 340 171 171 171 171 171 171 249 249 249 249 249 249 249 249 249 249 249 249 221 221 221 221 221 221 280 280 280 280 280 35 35 35 288 288 288 288 219 219 219 219 219 219 219 219 219 219 219 21 21 21 21 21 277 277 277 277 229 229 229 229 229 229 49 49 49 117 117 117 117 117 204 204 204 204 204 204 204 204 204 287 287 287 287 287 287 188 188 188 188 107 107 107 107 107 277 277 277 277 193 193 193 193 236 236 236 236 236 236 51 51 51 51 280 280 280 280 280 280 280 50 50 50 50 50 279 279 279 279 279 279 279 279 279 279 279 279 279 229 229 229 229 229 229 21 21 21 21 21 21 21 277 277 277 277 277 277 277 288 288 288 288 288 288 1 1 1 1 1 1 1 223 223 223 223 223 223 223 223 223 223 223 101 101 101 101 101 101 101 101 101 221 221 221 221 221 221 221 221 221 221 221 221 225 225 225 225 225 204 204 204 204 204 204 204 107 107 107 107 107 107 107 107 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 47 47 47 47 328 328 328 328 328 47 47 47 47 47 109 109 109 109 109 109 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 288 287 287 287 287 287 287 287 287 287 287 287 287 133 133 133 133 133 232 232 232 232 232 146 146 146 146 146 146 146 187 187 187 187 187 225 225 225 225 225 225 225 225 133 133 133 133 133 133 133 329 329 329 329 329 329 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 208 208 208 208 208 115 115 115 115 115 197 197 197 197 197 197 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 101 101 117 117 117 117 117 189 189 189 189 189 189 189 189 116 116 116 116 116 116 119 119 119 119 119 37 37 37 37 37 37 288 288 288 288 288 288 331 331 331 331 305 305 305 116 116 116 107 107 107 107 107 107 204 204 204 204 204 204 204 204 119 119 119 52 52 52 52 107 107 107 107 107 107 107 107 133 133 133 133 133 133 133 133 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 163 163 163 163 163 163 163 163 163 163 163 163 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 247 247 247 247 247 247 247 247 247 247 247 247 247 247 247 225 225 225 225 225 225 225 116 116 116 116 187 187 187 233 233 233 233 233 233 233 53 53 53 53 53 53 53 53 172 172 172 172 172 172 287 287 287 287 287 188 188 188 188 107 107 107 107 107 107 107 208 208 208 208 208 208 208 208 208 208 208 47 47 47 47 47 328 328 328 328 328 279 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 53 228 228 228 228 228 228 228 335 335 335 335 335 335 335 335 335 335 335 321 321 321 321 321 321 321 321 321 280 280 280 280 280 280 280 280 280 280 187 187 187 232 232 232 232 232 232 115 115 115 115 115 115 321 321 321 321 321 321 321 321 321 321 189 189 189 236 236 236 236 236 236 236 236 236 111 111 111 111 111 111 111 111 111 111 111 111 111 69 69 69 69 69 69 69 69 69 69 69 69 69 69 277 277 277 277 277 277 277 277 277 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 233 233 116 116 116 335 335 335 335 335 335 335 335 335 53 53 53 53 53 53 236 236 236 236 236 236 187 187 187 233 233 233 233 53 53 53 53 53 53 172 172 172 172 172 172 287 287 287 48 48 48 107 107 107 204 204 204 204 204 204 204 287 287 287 287 287 287 287 287 287 287 287 277 277 277 277 277 165 165 165 165 165 165 165 165 165 233 233 233 233 233 116 116 116 116 116 51 51 51 51 51 51 51 51 51 51 272 272 272 272 272 272 271 271 271 271 271 271 271 277 277 277 277 277 277 21 21 21 21 21 21 273 273 273 273 273 273 273 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 208 208 208 208 208 227 227 227 227 227 227 227 227 209 209 209 209 209 209 209 209 232 232 232 232 232 287 287 287 188 188 188 188 175 175 175 175 175 175 193 193 193 193 328 328 328 187 187 187 228 228 228 228 228 228 228 228 50 50 50 50 175 175 175 175 175 175 175 175 175 175 305 305 305 305 305 116 116 116 179 179 179 179 179 179 179 179 179 179 179 249 249 249 249 249 249 249 249 249 249 249 249 228 228 228 228 228 228 228 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 116 279 279 279 279 279 279 279 279 279 279 221 221 221 221 221 221 321 321 321 321 321 321 321 321 321 225 225 225 225 225 225 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 208 208 208 208 208 179 179 179 179 179 179 37 37 37 37 116 116 116 116 50 50 50 50 287 287 287 287 287 287 287 287 287 287 287 133 133 133 133 133 225 225 225 225 225 49 49 49 49 49 49 177 177 177 277 277 277 277 277 277 41 41 41 41 41 41 41 41 41 228 228 228 228 228 228 228 228 171 171 171 171 171 145 145 145 145 145 228 228 228 228 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 39 39 39 39 39 39 225 225 225 225 225 49 49 49 49 49 177 177 177 177 341 341 341 341 341 341 37 37 37 37 37 37 37 37 233 233 233 233 117 117 117 117 117 144 144 144 144 144 279 279 279 279 279 279 279 273 273 273 273 273 273 133 133 133 133 233 233 233 233 233 233 233 281 281 281 281 281 281 281 144 144 144 144 144 144 287 287 287 287 287 287 49 49 49 49 117 117 117 117 117 117 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 52 52 52 52 227 227 227 227 227 227 227 165 165 165 165 165 165 165 165 165 165 165 224 224 224 224 224 224 224 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 232 232 232 232 232 232 232 107 107 107 107 107 107 277 277 277 277 277 277 69 69 69 69 69 69 288 288 288 288 288 288 187 187 187 187 187 187 288 288 288 288 288 171 171 171 171 171 145 145 145 145 228 228 228 228 228 119 119 119 52 52 52 52 52 279 279 279 279 279 289 289 289 289 289 289 289 165 165 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 49 49 49 49 49 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 165 165 165 165 165 165 165 165 165 165 165 165 165 189 189 189 236 236 236 236 236 236 236 236 236 119 119 119 119 164 164 164 164 164 164 164 164 331 331 331 331 331 331 331 331 144 144 144 144 144 144 219 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 229 229 229 229 229 229 189 189 189 189 189 236 236 236 236 236 236 236 236 19 19 19 19 19 19 232 232 232 232 119 119 119 52 52 52 52 52 171 171 171 171 171 171 171 171 171 171 171 101 101 101 101 101 101 101 101 101 101 101 101 101 101 328 328 328 328 291 291 291 291 291 291 291 291 291 291 291 291 149 149 149 149 149 149 117 117 117 117 204 204 204 204 204 204 204 204 204 204 287 287 287 287 287 287 287 287 287 287 287 277 277 277 277 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 287 287 287 287 287 287 49 49 49 49 233 233 233 233 233 233 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 316 316 331 331 331 331 331 331 133 133 133 133 133 233 233 233 233 288 288 288 287 287 287 287 48 48 48 48 107 107 107 107 107 107 107 107 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 275 275 275 275 193 193 193 193 193 329 329 329 329 329 329 144 144 144 144 144 287 287 287 287 287 287 287 48 48 48 48 227 227 227 227 227 227 209 209 209 209 209 209 209 209 288 288 288 179 179 179 179 179 179 193 193 193 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 281 189 189 189 189 189 189 340 340 340 340 340 279 279 279 279 279 279 279 279 279 279 273 273 273 273 273 133 133 133 133 133 133 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 144 144 144 144 144 144 331 331 331 331 49 49 49 49 224 224 224 224 224 115 115 115 115 115 277 277 277 277 21 21 21 21 21 21 21 272 272 272 272 272 272 187 187 187 187 228 228 228 228 228 228 228 67 67 67 67 67 67 67 67 67 67 172 172 172 172 172 172 172 172 172 172 119 119 119 119 133 133 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 328 328 328 328 219 219 219 219 219 69 69 69 69 69 69 69 277 277 277 277 277 277 277 277 277 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 208 175 175 175 175 175 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 19 19 19 19 19 19 19 19 19 19 232 232 232 232 232 287 287 287 287 287 48 48 48 48 48 48 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 279 279 279 279 279 279 279 37 37 37 37 37 37 37 37 37 37 233 233 233 117 117 117 117 340 340 340 279 279 279 289 289 289 289 289 289 289 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 49 49 49 232 232 232 179 179 179 179 179 179 179 145 145 145 145 145 145 281 281 281 281 281 281 281 281 133 133 133 133 133 133 133 133 225 225 225 225 225 225 225 225 225 225 225 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 49 49 49 49 49 224 224 224 224 224 224 271 271 271 271 271 271 271 271 271 271 277 277 277 277 277 101 101 101 101 101 101 101 101 117 117 117 117 117 189 189 189 189 189 116 116 116 116 179 179 179 179 179 179 179 179 145 145 145 145 145 145 145 145 281 281 281 281 281 281 281 281 133 133 133 133 133 133 225 225 225 225 225 225 225 225 225 172 172 172 172 172 172 172 19 19 19 232 232 232 232 232 232 232 67 67 67 67 67 67 67 67 67 67 67 225 225 225 225 225 225 225 333 333 333 333 333 333 333 333 205 205 205 205 205 340 340 340 340 279 279 279 279 279 279 279 279 279 273 273 273 273 273 273 209 209 209 209 209 209 221 221 221 221 189 189 189 189 236 236 236 236 236 236 236 179 179 179 179 179 179 179 179 148 148 148 148 148 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 233 233 233 233 233 233 233 233 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 271 271 271 271 271 277 277 277 277 277 49 49 49 49 49 281 281 281 281 281 281 281 281 281 209 209 209 209 209 209 209 117 117 117 117 49 49 49 49 49 116 116 116 116 287 287 287 287 287 188 188 188 188 188 279 279 279 279 279 279 273 273 273 273 273 273 209 209 209 209 209 209 220 220 220 220 220 220 191 191 191 191 191 288 288 288 231 231 231 231 231 231 231 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 329 329 329 329 329 189 189 189 236 236 236 236 236 236 236 236 47 47 47 47 47 47 47 217 217 217 217 217 217 217 217 217 53 53 53 53 53 53 53 53 281 281 281 281 281 281 281 289 289 289 289 289 289 49 49 49 49 49 116 116 116 179 179 179 179 179 179 179 179 144 144 144 144 227 227 227 227 227 227 227 227 227 227 227 133 133 133 133 233 233 233 233 233 233 289 289 289 49 49 49 49 224 224 224 224 224 224 224 224 224 35 35 35 35 35 35 35 35 35 35 289 289 289 289 289 49 49 49 49 49 49 289 289 289 289 289 289 289 289 289 325 325 325 325 325 325 325 325 325 325 116 116 116 287 287 287 287 287 287 188 188 188 188 188 119 119 119 119 189 189 189 189 280 280 280 280 280 280 280 280 280 47 47 47 47 47 229 229 229 229 229 229 229 165 165 165 165 165 165 165 165 165 165 165 165 165 341 341 341 341 341 341 341 341 189 189 189 189 236 236 236 236 236 236 236 236 236 271 271 271 271 271 271 271 271 209 209 209 209 209 209 209 209 280 280 280 280 280 280 280 47 47 47 47 328 328 328 328 328 328 231 231 231 231 337 337 337 337 337 337 337 337 321 321 321 321 321 321 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 133 133 133 133 133 133 133 133 133 133 224 224 224 224 224 224 224 224 227 227 227 227 227 227 227 227 227 17 17 17 277 277 277 277 277 277 277 193 193 193 193 193 225 225 225 225 225 225 225 225 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 224 224 224 224 224 224 224 224 224 224 224 215 215 215 215 215 215 53 53 53 53 53 53 53 281 281 281 281 281 281 281 281 281 288 288 288 287 287 287 287 287 287 287 133 133 133 133 224 224 224 224 224 224 224 224 335 335 335 335 335 335 335 335 320 320 320 320 320 320 320 271 271 271 271 271 271 271 271 271 271 271 225 225 225 225 225 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 119 119 119 119 49 49 49 49 288 288 288 288 288 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 291 291 291 291 291 291 291 291 291 291 193 193 193 193 237 237 237 237 237 237 237 237 237 220 220 220 220 220 335 335 335 335 335 305 305 305 276 276 276 276 276 276 276 276 276 115 115 115 115 115 115 321 321 321 321 321 321 321 321 321 321 321 189 189 189 189 236 236 236 236 236 236 50 50 50 50 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 289 289 289 289 289 289 289 204 204 204 204 204 204 171 171 171 171 171 171 171 171 171 171 171 171 321 321 321 321 321 321 321 321 321 225 225 225 225 225 225 189 189 189 189 284 284 284 284 284 284 284 284 284 284 284 284 284 284 284 291 291 291 291 193 193 193 193 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 275 275 275 275 275 275 275 193 193 193 193 193 281 281 281 281 281 281 281 281 281 281 221 221 221 221 221 221 204 204 204 204 204 204 204 204 291 291 291 291 291 291 291 291 291 291 193 193 193 193 193 193 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 119 119 119 119 37 37 37 37 37 37 37 37 37 37 289 289 289 289 289 280 280 280 280 280 280 280 331 331 331 331 331 331 331 53 53 53 53 53 53 53 53 53 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 335 335 335 335 335 335 335 320 320 320 320 320 115 115 115 115 115 115 249 249 249 249 249 249 249 249 233 233 233 288 288 288 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 331 331 331 331 331 331 331 331 53 53 53 53 53 288 288 288 288 335 335 335 335 305 305 305 305 276 276 276 276 276 276 175 175 175 175 175 175 175 175 133 133 133 133 133 289 289 289 289 289 289 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 335 335 335 335 335 335 335 335 335 335 335 305 305 305 276 276 276 276 276 276 276 276 276 276 107 107 107 107 107 107 107 277 277 277 277 277 193 193 193 193 193 237 237 237 237 237 189 189 189 189 236 236 236 236 236 236 236 236 236 50 50 50 50 50 50 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 277 277 277 165 165 165 165 165 165 165 233 233 233 233 233 233 233 216 216 216 216 216 216 216 216 216 216 216 111 111 111 111 111 111 111 111 101 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 116 116 116 116 116 116 187 187 187 187 187 233 233 233 233 233 289 289 289 289 289 289 320 320 320 335 335 335 69 69 69 69 69 69 69 276 276 276 276 276 179 179 179 179 179 179 179 85 85 85 85 85 85 85 85 85 85 85 280 280 280 280 280 280 280 47 47 47 233 233 233 116 116 116 179 179 179 179 249 249 249 249 249 249 249 249 249 249 249 249 249 249 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 233 116 116 116 335 335 335 335 335 320 320 320 320 320 320 320 115 115 115 115 115 115 115 115 249 249 249 249 249 249 249 249 249 249 249 249 232 232 232 232 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 50 50 50 50 50 50 279 279 279 279 279 279 279 279 279 279 279 279 279 279 193 193 193 237 237 237 237 237 237 237 237 237 237 177 177 177 49 49 49 49 224 224 224 224 224 224 224 291 291 291 291 291 291 193 193 193 193 236 236 236 236 236 236 236 47 47 47 47 109 109 109 109 109 85 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 179 179 179 179 179 193 193 193 193 228 228 228 228 228 231 231 231 231 231 231 231 231 69 69 69 69 69 69 276 276 276 276 276 276 276 331 331 331 331 331 331 331 331 331 331 53 53 53 53 288 288 288 179 179 179 189 189 189 340 340 340 340 340 340 115 115 115 115 115 197 197 197 197 281 281 281 281 281 281 273 273 273 273 273 49 49 49 49 49 49 341 341 341 341 341 341 193 193 193 193 285 285 285 285 285 285 285 285 285 285 49 49 49 232 232 232 232 187 187 187 187 187 340 340 340 340 340 340 340 340 223 223 223 223 223 223 101 101 101 101 101 101 101 101 101 101 101 101 220 220 220 220 220 220 220 220 220 231 231 231 231 231 231 231 231 69 69 69 69 69 69 69 69 276 276 276 276 276 276 276 276 331 331 331 331 331 331 331 331 53 53 53 53 53 53 53 288 288 288 288 288 288 288 279 279 279 279 279 279 69 69 69 69 277 277 277 277 277 288 288 288 47 47 47 47 328 328 328 328 328 328 271 271 271 271 271 271 271 271 271 271 271 271 133 133 133 133 133 277 277 277 277 277 277 277 277 277 277 277 277 49 49 49 49 49 233 233 233 233 289 289 289 289 280 280 280 179 179 179 179 179 208 208 208 208 208 208 208 208 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 231 231 231 231 231 231 231 231 231 231 69 69 69 69 69 69 69 69 69 69 69 276 276 276 276 276 179 179 179 179 179 179 179 179 84 84 84 84 84 84 84 84 84 84 84 179 179 179 179 179 179 179 179 209 209 209 209 209 209 340 340 340 340 340 340 223 223 223 223 223 223 101 101 101 101 101 101 101 221 221 221 221 221 221 221 225 225 225 225 225 204 204 204 204 204 287 287 287 188 188 188 188 188 287 287 287 287 287 287 287 287 287 149 149 149 149 149 149 149 149 149 232 232 232 232 232 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 331 331 100 100 100 100 100 100 100 100 100 100 100 100 187 187 187 288 288 288 288 288 331 331 331 331 49 49 49 340 340 340 340 340 340 340 247 247 247 247 247 247 247 247 233 233 233 233 233 233 225 225 225 225 204 204 204 204 204 204 204 204 223 223 223 223 223 223 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 281 288 288 288 288 288 331 331 331 331 331 209 209 209 209 209 209 209 220 220 220 220 220 220 220 220 102 102 102 102 102 102 102 102 102 102 102 102 102 275 275 275 275 275 275 275 133 133 133 133 116 116 116 116 116 116 187 187 187 187 232 232 232 232 232 119 119 119 52 52 52 271 271 271 271 271 271 271 165 165 165 165 165 165 165 165 165 165 273 273 273 273 273 273 273 273 144 144 144 144 144 144 144 144 144 144 179 179 179 179 179 179 179 179 84 84 84 84 84 84 84 84 84 84 84 84 50 50 50 50 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 232 232 232 232 232 35 35 35 233 233 233 116 116 116 179 179 179 179 179 193 193 193 193 193 193 340 340 340 340 340 340 340 340 340 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 172 172 172 172 172 172 172 172 51 51 51 51 51 51 51 51 51 51 272 272 272 272 331 331 331 331 331 331 331 331 331 331 331 133 133 133 133 133 133 133 133 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 47 47 47 328 328 328 119 119 119 119 204 204 204 204 204 204 204 99 99 99 99 99 99 99 99 99 99 99 99 225 225 225 225 225 225 225 49 49 49 49 49 233 233 233 233 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 287 287 287 287 287 287 287 287 305 305 305 305 305 305 305 220 220 220 220 50 50 50 50 50 50 50 107 107 107 107 107 107 107 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 288 47 47 47 47 328 328 328 328 47 47 47 47 232 232 232 232 232 232 67 67 67 67 67 67 67 67 67 67 277 277 277 277 277 277 173 173 173 173 173 173 173 173 49 49 49 49 232 232 232 232 47 47 47 47 47 47 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 49 49 49 49 228 228 228 228 228 228 35 35 35 35 35 35 35 35 233 233 233 116 116 116 179 179 179 179 179 179 208 208 208 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 288 288 288 288 288 288 171 171 171 171 171 171 171 171 171 171 171 171 171 171 101 101 101 101 101 101 101 101 101 101 101 101 101 276 276 276 276 276 276 276 276 287 287 287 287 287 188 188 188 119 119 119 52 52 52 52 52 179 179 179 179 179 179 179 179 179 179 85 85 85 85 85 85 85 85 280 280 280 280 280 280 280 280 280 280 35 35 35 35 288 288 288 288 288 288 231 231 231 231 231 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 288 288 288 288 288 288 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 19 19 19 19 19 19 19 19 19 19 232 232 232 232 232 232 232 232 232 232 271 271 271 271 271 271 271 271 271 271 149 149 149 149 149 149 149 149 273 273 273 273 273 49 49 49 49 49 49 49 49 280 280 280 280 280 280 280 280 280 227 227 227 227 17 17 17 277 277 277 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 48 48 48 48 48 48 48 48 48 48 48 48 48 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 116 231 231 231 231 231 193 193 193 193 193 193 277 277 277 277 277 277 277 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 107 107 107 107 107 107 107 107 107 107 149 149 149 149 149 149 149 149 149 233 233 233 233 233 288 288 288 288 119 119 119 49 49 49 49 49 228 228 228 228 228 287 287 287 287 287 287 287 287 320 320 320 320 320 320 50 50 50 50 50 50 219 219 219 219 219 219 219 219 219 277 277 277 193 193 193 193 281 281 281 281 281 281 281 281 281 272 272 272 272 272 272 187 187 187 187 232 232 232 232 119 119 119 133 133 133 133 276 276 276 276 276 276 276 107 107 107 107 107 133 133 133 133 133 133 133 133 133 117 117 117 117 117 117 117 117 117 117 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 116 116 102 102 102 102 102 102 102 102 102 102 102 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 47 47 47 47 233 233 233 233 233 233 233 233 233 53 53 53 53 53 121 121 121 121 121 121 144 144 144 144 144 219 219 219 219 219 219 219 219 219 219 165 165 165 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 331 331 331 331 133 133 133 133 276 276 276 276 276 276 276 47 47 47 47 232 232 232 232 47 47 47 47 47 47 117 117 117 117 117 21 21 21 21 21 21 21 21 21 21 21 273 273 273 273 289 289 289 289 289 49 49 49 49 49 49 116 116 116 116 116 116 107 107 107 107 107 107 264 264 264 264 264 264 264 264 264 264 264 264 335 335 335 335 335 335 335 335 335 335 335 335 321 321 321 321 321 321 321 341 341 341 341 341 341 341 116 116 116 287 287 287 287 48 48 48 48 48 279 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 220 220 220 220 220 220 220 220 220 220 220 220 220 119 119 119 119 119 119 204 204 204 204 204 204 204 204 204 131 131 131 131 131 131 131 131 131 177 177 177 177 177 177 177 177 177 177 177 177 177 177 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 164 164 164 164 164 164 164 164 164 219 219 219 219 219 219 219 219 219 219 219 305 305 305 305 305 305 117 117 117 117 117 49 49 49 49 49 49 233 233 233 233 233 288 288 288 288 107 107 107 107 107 277 277 277 277 165 165 165 165 220 220 220 220 220 220 220 220 179 179 179 193 193 193 193 228 228 228 228 51 51 51 51 51 51 51 328 328 328 328 328 328 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 191 191 191 191 191 191 191 191 191 172 172 172 172 172 172 172 335 335 335 335 335 320 320 320 320 320 320 320 320 179 179 179 179 179 179 179 179 179 179 37 37 37 116 116 116 116 116 35 35 35 35 35 35 35 35 35 35 281 281 281 281 281 281 281 281 288 288 288 288 288 227 227 227 227 100 100 100 100 100 100 100 100 100 31 31 31 31 31 31 117 117 117 329 329 329 329 329 329 101 101 101 101 101 101 101 101 101 101 101 101 101 101 280 280 280 280 280 280 280 280 280 280 280 187 187 187 232 232 232 119 119 119 52 52 52 227 227 227 227 227 37 37 37 37 37 37 289 289 289 289 289 289 289 289 289 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 193 193 193 193 193 112 112 112 112 112 112 112 112 112 112 112 112 112 335 335 335 335 335 320 320 320 320 320 320 320 115 115 115 115 115 115 115 115 115 193 193 193 193 193 117 117 117 117 49 49 49 49 233 233 233 233 233 233 288 288 288 288 288 288 288 288 288 288 1 1 1 115 115 115 115 115 115 320 320 320 320 320 320 320 320 320 320 320 320 320 227 227 227 227 227 227 227 17 17 17 17 277 277 277 277 277 277 277 193 193 193 193 193 225 225 225 225 225 225 225 225 225 48 48 48 48 48 48 48 48 48 48 48 48 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 116 116 116 179 179 179 37 37 37 328 328 328 328 328 328 279 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 171 171 171 171 171 171 171 171 171 171 69 69 69 69 69 69 69 276 276 276 276 276 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 149 149 149 149 149 149 281 281 281 281 281 281 281 281 281 281 281 281 281 205 205 205 205 205 340 340 340 340 340 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 165 165 165 165 165 165 165 165 165 165 165 220 220 220 220 220 220 220 231 231 231 231 231 231 231 231 21 21 21 21 21 21 21 288 288 288 288 288 287 287 287 287 287 287 48 48 48 48 291 291 291 291 291 291 291 291 291 291 291 291 291 291 193 193 193 237 237 237 237 237 237 237 220 220 220 220 47 47 47 47 328 328 328 328 328 279 279 279 279 279 279 53 53 53 53 53 53 112 112 112 112 112 50 50 50 50 50 50 50 291 291 291 291 291 291 291 291 291 193 193 193 193 193 193 193 236 236 236 236 236 236 236 236 236 236 236 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 289 289 289 289 289 280 280 280 280 280 280 280 280 280 280 280 331 331 331 331 331 53 53 53 53 53 53 53 53 53 53 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 119 119 119 119 119 119 193 193 193 193 193 193 193 193 193 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 215 215 215 215 215 215 215 215 215 215 215 215 215 215 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 109 109 109 109 109 109 340 340 340 340 340 340 340 219 219 219 219 219 219 219 219 219 53 53 53 53 229 229 229 229 229 229 229 229 173 173 173 173 145 145 145 145 145 289 289 289 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 209 209 209 209 229 229 229 229 229 229 116 116 116 116 116 116 116 116 231 231 231 231 231 231 231 101 101 101 101 101 101 101 101 101 101 101 101 121 121 121 121 144 144 144 144 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 47 47 47 47 47 47 173 173 173 173 173 173 173 173 173 173 173 173 173 173 133 133 133 133 133 133 133 133 133 233 233 233 233 233 233 233 233 233 233 233 233 233 233 233 233 116 116 116 116 116 116 116 116 116 116 231 231 231 231 231 231 69 69 69 69 69 69 276 276 276 276 276 276 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 47 47 47 47 47 47 47 225 225 225 225 225 225 225 225 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 228 228 228 228 228 228 227 227 227 227 227 17 17 17 277 277 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 225 225 48 48 48 48 48 48 48 48 48 48 48 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 231 231 231 231 231 193 193 193 193 193 193 289 289 289 289 289 189 189 189 189 189 189 116 116 116 116 116 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 133 133 133 133 133 117 117 117 117 49 49 49 49 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 67 67 67 67 67 67 67 67 67 67 67 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 115 115 115 115 115 115 115 249 249 249 249 249 249 249 249 233 233 233 288 288 288 115 115 115 115 189 189 189 189 189 189 233 233 233 233 233 233 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 119 119 119 119 133 133 133 277 277 277 277 277 340 340 340 340 340 279 279 279 279 279 279 53 53 53 53 53 229 229 229 229 229 229 293 293 293 293 293 293 189 189 189 189 236 236 236 236 236 236 187 187 187 187 232 232 232 232 331 331 331 331 331 331 53 53 53 53 53 288 288 288 288 288 288 288 335 335 335 335 320 320 320 320 320 279 279 279 279 279 279 279 279 279 279 279 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 275 275 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 328 328 328 328 328 328 179 179 179 179 179 179 179 179 37 37 37 37 37 37 116 116 116 116 116 116 279 279 279 279 279 279 53 53 53 53 228 228 228 228 228 228 228 228 219 219 219 219 219 219 219 219 333 333 333 333 333 333 21 21 21 21 21 21 21 21 225 225 225 225 229 229 229 229 229 229 229 229 340 340 340 340 340 340 340 340 227 227 227 227 227 227 105 105 105 105 105 105 105 105 281 281 281 281 281 281 281 281 281 281 133 133 133 133 133 225 225 225 225 225 225 225 225 225 225 225 172 172 172 172 172 172 172 172 172 172 172 172 172 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 53 288 288 288 288 288 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 337 337 316 316 316 316 316 316 316 316 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 340 287 287 287 287 287 287 287 287 287 287 133 133 133 133 277 277 277 277 277 277 277 277 49 49 49 49 49 109 109 109 49 49 49 224 224 224 224 224 224 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 288 288 288 288 288 288 288 19 19 19 19 19 19 19 19 232 232 232 232 232 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 219 219 219 219 219 219 219 305 305 305 116 116 116 116 116 116 279 279 279 279 279 279 279 279 279 279 208 208 208 208 208 208 208 208 208 208 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 288 288 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 175 175 175 175 175 175 165 165 165 165 165 165 165 165 165 165 165 328 328 328 328 328 328 328 191 191 191 191 191 191 191 191 191 191 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 289 289 289 289 289 280 280 280 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 225 225 225 225 225 225 225 225 225 117 117 117 117 117 117 49 49 49 49 49 228 228 228 228 228 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 289 289 289 289 289 289 280 280 280 179 179 179 189 189 189 340 340 340 340 340 340 340 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 101 101 233 233 233 116 116 116 116 19 19 19 19 19 19 19 19 19 232 232 232 232 232 232 131 131 131 131 233 233 233 233 233 233 205 205 205 205 205 293 293 293 293 293 293 293 293 293 293 293 197 197 197 197 197 236 236 236 236 236 236 236 236 236 236 236 236 119 119 119 119 49 49 49 49 49 49 288 288 288 288 288 288 288 288 288 331 331 331 331 133 133 133 133 133 232 232 232 179 179 179 179 179 179 179 179 179 179 208 208 208 208 208 208 115 115 115 115 115 115 115 115 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 53 340 340 340 340 340 340 340 340 102 102 102 102 102 102 102 102 102 102 102 102 102 67 67 67 67 67 67 225 225 225 225 225 225 333 333 333 333 333 333 205 205 205 205 340 340 340 340 340 340 340 340 340 340 340 171 171 171 171 171 209 209 209 209 209 209 209 209 209 209 224 224 224 224 224 224 187 187 187 187 289 289 289 289 280 280 280 280 280 227 227 227 227 227 227 100 100 100 100 100 100 100 100 100 115 115 115 115 115 115 337 337 337 337 337 321 321 321 321 321 321 321 289 289 289 289 289 289 204 204 204 204 204 204 204 204 204 287 287 287 287 287 287 188 188 188 188 175 175 175 175 175 175 175 193 193 193 193 328 328 328 328 191 191 191 191 191 191 191 191 191 191 191 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 116 116 35 35 35 35 35 35 35 340 340 340 340 340 340 340 340 340 340 340 340 171 171 171 171 171 144 144 144 144 144 144 144 119 119 119 119 52 52 52 52 52 275 275 275 275 275 275 275 275 275 193 193 193 193 193 193 281 281 281 281 281 281 281 281 281 281 281 281 220 220 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 133 133 133 133 133 277 277 277 277 277 277 277 340 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 193 193 193 193 193 281 281 281 281 281 281 281 281 281 221 221 221 221 221 221 280 280 280 280 280 187 187 187 187 232 232 232 232 232 232 232 232 271 271 271 271 271 271 271 277 277 277 277 193 193 193 289 289 289 204 204 204 204 204 204 231 231 231 231 231 193 193 193 193 193 193 193 193 276 276 276 276 276 276 276 276 131 131 131 131 131 131 131 131 131 131 329 329 329 329 329 329 329 277 277 277 277 205 205 205 205 205 205 205 205 293 293 293 293 293 293 293 293 197 197 197 197 197 197 236 236 236 236 236 236 236 50 50 50 50 50 107 107 107 107 107 107 107 107 107 107 21 21 21 21 21 21 21 21 117 117 117 117 204 204 204 204 204 204 204 204 115 115 115 115 115 115 115 115 53 53 53 53 53 53 53 53 53 53 340 340 340 340 340 340 187 187 187 187 232 232 232 119 119 119 119 189 189 189 189 189 189 280 280 280 280 280 280 280 280 280 280 280 331 331 331 331 331 331 149 149 149 149 149 225 225 225 225 225 225 225 225 225 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 133 133 133 133 133 277 277 277 277 277 277 277 340 340 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 275 193 193 193 193 193 281 281 281 281 281 281 281 281 281 281 281 221 221 221 221 221 221 280 280 280 280 280 187 187 187 232 232 232 232 232 232 271 271 271 271 271 271 271 271 209 209 209 209 209 209 209 273 273 273 273 273 49 49 49 225 225 225 225 225 340 340 340 340 340 340 179 179 179 179 179 37 37 37 37 37 37 37 329 329 329 329 189 189 189 189 236 236 236 236 236 236 236 236 111 111 111 111 111 111 111 111 193 193 193 193 193 225 225 225 225 225 225 117 117 117 117 117 277 277 277 49 49 49 232 232 232 232 232 47 47 47 47 47 328 328 328 119 119 119 119 133 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 247 247 247 247 247 247 247 247 247 247 247 247 247 247 247 247 232 232 232 232 232 232 232 232 191 191 191 191 172 172 172 172 172 172 172 172 191 191 191 288 288 288 219 219 219 219 219 219 53 53 53 53 53 229 229 229 229 229 229 229 229 229 229 340 340 340 340 340 340 287 287 287 287 48 48 48 48 48 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 164 164 164 164 164 164 164 164 164 164 164 164 164 115 115 115 115 115 249 249 249 249 249 249 249 249 249 249 249 249 233 233 233 288 288 288 288 288 67 67 67 67 67 67 67 225 225 225 225 333 333 333 333 333 333 333 205 205 205 205 205 340 340 340 340 340 287 287 287 287 287 287 287 287 287 149 149 149 149 149 149 149 149 232 232 232 232 232 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 331 331 331 331 331 133 133 133 133 133 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 233 233 233 116 116 116 116 116 116 119 119 119 119 133 133 133 133 133 133 133 133 133 232 232 232 232 231 231 231 231 231 231 231 231 231 249 249 249 249 249 249 249 249 249 329 329 329 329 329 48 48 48 48 48 279 279 279 279 279 279 279 279 279 221 221 221 221 221 249 249 249 249 249 249 249 249 249 249 285 285 285 285 285 285 285 285 285 48 48 48 187 187 187 187 340 340 340 340 340 340 340 340 275 275 275 275 275 101 101 101 101 101 101 101 101 101 101 288 288 288 219 219 219 219 219 219 219 219 219 219 225 225 225 225 249 249 249 249 249 249 280 280 280 280 280 280 280 287 287 287 287 287 287 287 48 48 48 119 119 119 119 204 204 204 204 204 204 204 204 204 99 99 99 99 99 99 99 99 99 99 99 99 225 225 225 225 225 225 225 49 49 49 49 49 233 233 233 233 116 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 191 191 191 288 288 288 288 288 191 191 191 191 191 341 341 341 341 341 341 341 49 49 49 49 49 233 233 233 288 288 288 288 131 131 131 131 340 340 340 340 340 340 340 340 340 187 187 187 187 187 187 187 172 172 172 172 172 172 172 331 331 331 208 208 208 208 208 331 331 331 331 331 331 331 144 144 144 144 144 144 175 175 175 175 175 175 175 133 133 133 133 289 289 289 289 289 189 189 189 189 189 236 236 236 236 236 236 236 179 179 179 179 179 193 193 193 193 193 228 228 228 228 228 228 171 171 171 171 171 171 171 145 145 145 145 145 228 228 228 228 191 191 191 191 191 191 191 191 191 237 237 237 237 237 237 237 237 237 237 237 177 177 177 177 177 225 225 225 225 225 49 49 49 49 49 233 233 233 116 116 116 116 116 67 67 67 67 67 67 276 276 276 276 276 119 119 119 119 52 52 52 52 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 289 289 289 289 289 289 289 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 208 208 208 208 219 219 219 219 219 219 219 219 219 37 37 37 37 37 37 37 37 233 233 233 288 288 288 107 107 107 107 204 204 204 204 204 204 204 227 227 227 227 227 227 227 227 227 227 53 53 53 53 53 53 112 112 112 112 112 112 112 112 112 112 112 112 112 112 115 115 115 193 193 193 193 173 173 173 173 173 173 277 277 277 49 49 49 233 233 233 288 288 288 288 171 171 171 171 171 145 145 145 145 228 228 228 228 15 15 15 15 277 277 277 277 277 277 277 277 281 281 281 281 281 281 281 133 133 133 133 133 133 133 225 225 225 225 225 225 225 225 225 329 329 329 329 329 329 329 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 133 133 133 133 133 133 224 224 224 224 224 224 224 224 102 102 102 102 102 102 102 102 102 102 102 102 102 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 249 249 249 249 249 249 249 249 249 249 272 272 272 272 272 272 272 187 187 187 288 288 288 288 288 331 331 331 49 49 49 224 224 224 224 224 224 224 287 287 287 287 287 287 287 287 287 287 149 149 149 149 149 232 232 232 232 232 83 83 83 83 83 83 83 83 288 288 288 288 288 288 288 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 275 275 275 275 275 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 116 116 116 116 227 227 227 227 227 227 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 165 165 165 165 165 165 165 165 113 113 113 113 113 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 187 187 187 187 232 232 232 232 50 50 50 50 287 287 287 287 287 287 287 287 287 287 287 287 287 249 249 249 249 249 249 249 249 249 249 232 232 232 232 232 119 119 119 49 49 49 288 288 288 288 288 288 271 271 271 271 271 271 271 271 271 271 271 271 225 225 225 225 225 165 165 165 165 165 165 165 165 165 233 233 233 233 233 225 225 225 225 204 204 204 204 204 204 204 204 204 191 191 191 191 191 191 191 191 233 233 233 233 117 117 117 117 117 49 49 49 49 221 221 221 221 221 221 221 221 169 169 169 169 169 169 169 169 289 289 289 289 289 189 189 189 189 116 116 116 116 179 179 179 179 179 179 179 144 144 144 144 144 144 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 165 165 165 165 165 165 165 165 233 233 233 233 233 233 233 233 233 233 233 173 173 173 173 173 173 173 49 49 49 49 49 224 224 224 224 224 224 224 224 224 115 115 115 115 115 115 115 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 289 289 289 289 289 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 247 247 247 247 247 247 247 247 247 247 247 233 233 233 233 233 233 233 233 233 233 233 233 225 225 225 225 204 204 204 204 204 204 204 204 204 204 115 115 115 115 115 115 115 115 115 249 249 249 249 249 249 249 249 249 249 249 249 233 233 233 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 279 279 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 102 102 102 102 102 102 102 102 102 102 115 115 115 115 115 115 193 193 193 193 193 117 117 117 49 49 49 232 232 232 232 232 331 331 331 331 331 331 331 331 69 69 69 69 69 277 277 277 277 277 277 232 232 232 335 335 335 335 335 335 320 320 320 320 320 320 187 187 187 187 187 172 172 172 172 179 179 179 179 179 208 208 208 208 208 208 107 107 107 107 107 107 107 107 107 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 233 233 233 233 233 233 233 340 340 340 340 340 340 340 340 175 175 175 175 175 175 277 277 277 277 277 277 209 209 209 209 209 209 209 209 209 232 232 232 232 232 232 232 232 232 175 175 175 175 175 165 165 165 165 165 165 165 165 165 109 109 109 109 109 49 49 49 49 225 225 225 225 225 225 225 225 225 340 340 340 340 115 115 115 115 115 115 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 232 146 146 146 146 146 146 146 146 271 271 271 271 271 271 271 271 305 305 305 305 305 289 289 289 289 289 289 280 280 280 279 279 279 279 279 279 279 279 279 289 289 289 289 289 277 277 277 193 193 193 193 193 221 221 221 221 221 221 221 221 233 233 233 233 233 105 105 105 105 105 105 105 105 105 232 232 232 232 232 187 187 187 187 232 232 232 232 119 119 119 52 52 52 52 331 331 331 331 331 331 133 133 133 133 133 133 133 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 179 179 179 179 179 149 149 149 149 149 149 149 149 116 116 116 47 47 47 328 328 328 328 50 50 50 50 50 219 219 219 219 219 219 219 219 165 165 165 165 165 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 280 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 329 329 329 144 144 144 144 144 187 187 187 187 232 232 232 231 231 231 231 337 337 337 337 337 337 320 320 320 320 320 320 320 320 107 107 107 107 107 107 277 277 277 277 53 53 53 53 233 233 233 233 233 233 233 233 233 341 341 341 341 341 341 333 333 333 333 333 189 189 189 189 189 189 189 220 220 220 220 220 220 220 331 331 331 133 133 133 276 276 276 47 47 47 232 232 232 232 232 232 232 67 67 67 67 67 67 277 277 277 277 277 173 173 173 173 173 173 49 49 49 49 232 232 232 232 47 47 47 47 47 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 225 225 225 225 225 49 49 49 49 228 228 228 228 228 228 111 111 111 111 111 111 111 111 111 111 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 116 116 116 115 115 115 115 115 193 193 193 193 116 116 116 116 116 116 116 116 119 119 119 119 119 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 119 119 119 52 52 52 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 249 249 249 249 249 249 249 249 249 249 224 224 224 224 224 171 171 171 171 171 171 171 171 171 171 171 171 37 37 37 37 37 37 37 37 37 37 229 229 229 229 225 225 225 225 225 225 204 204 204 204 204 204 204 115 115 115 115 115 115 115 101 101 101 101 101 101 101 101 101 101 116 116 116 116 116 116 187 187 187 232 232 232 232 232 232 232 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 193 193 193 193 193 277 277 277 277 277 277 173 173 173 173 173 173 49 49 49 49 224 224 224 224 224 224 224 224 224 224 35 35 35 35 35 35 35 35 35 177 177 177 177 177 49 49 49 233 233 233 233 205 205 205 205 205 205 205 205 205 205 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 247 247 247 247 247 247 247 247 247 247 247 247 247 247 247 247 233 233 233 233 233 225 225 225 225 204 204 204 204 191 191 191 191 288 288 288 288 288 288 331 331 331 331 49 49 49 340 340 340 340 340 50 50 50 50 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 187 187 187 187 232 232 232 119 119 119 119 37 37 37 37 37 37 288 288 288 288 288 191 191 191 191 191 191 233 233 233 233 233 233 281 281 281 281 281 281 281 289 289 289 289 289 49 49 49 233 233 233 233 233 233 233 233 233 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 331 133 133 133 133 133 224 224 224 224 224 224 224 224 331 331 331 331 331 331 331 148 148 148 148 148 148 231 231 231 231 231 231 21 21 21 21 21 21 21 288 288 288 288 288 288 288 288 175 175 175 175 175 175 175 133 133 133 133 289 289 289 189 189 189 189 236 236 236 236 236 236 236 50 50 50 50 50 175 175 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 133 133 133 133 116 116 116 116 227 227 227 227 227 17 17 17 277 277 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 225 225 48 48 48 48 48 48 48 48 48 48 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 340 340 340 340 340 340 340 340 340 191 191 191 191 191 191 172 172 172 172 172 271 271 271 271 271 271 271 271 265 265 265 265 265 265 265 265 265 265 265 265 265 341 341 341 341 341 341 49 49 49 233 233 233 189 189 189 189 236 236 236 236 236 236 236 236 236 236 331 331 331 331 331 331 133 133 133 133 133 133 133 225 225 225 225 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 331 331 331 331 331 331 331 331 331 144 144 144 50 50 50 50 50 50 271 271 271 271 271 271 271 271 337 337 337 337 337 337 337 305 305 305 305 277 277 277 277 277 277 277 277 225 225 225 225 225 204 204 204 204 204 204 204 204 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 133 133 133 133 133 229 229 229 229 229 229 229 49 49 49 233 233 233 49 49 49 49 49 232 232 232 232 232 47 47 47 47 221 221 221 221 221 221 221 221 221 221 21 21 21 229 229 229 229 229 229 273 273 273 225 225 225 189 189 189 189 285 285 285 285 285 285 285 285 285 285 229 229 229 229 229 49 49 49 49 233 233 233 233 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 231 231 231 231 231 231 21 21 21 21 21 21 21 21 288 288 288 287 287 287 287 287 188 188 188 107 107 107 107 204 204 204 204 204 204 204 204 115 115 115 115 115 115 115 115 277 277 277 277 277 133 133 133 133 133 117 117 117 117 189 189 189 189 189 116 116 116 116 116 116 116 187 187 187 232 232 232 119 119 119 52 52 52 52 219 219 219 219 219 219 219 219 219 219 165 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 47 47 47 47 328 328 328 328 328 50 50 50 50 50 107 107 107 107 107 107 107 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 116 116 116 231 231 231 231 231 133 133 133 133 133 133 329 329 329 329 329 144 144 144 144 144 144 144 115 115 115 115 115 115 115 115 277 277 277 277 277 277 209 209 209 209 209 209 209 209 209 228 228 228 228 228 47 47 47 328 328 328 328 328 287 287 287 287 287 287 287 287 287 287 165 165 165 165 165 165 221 221 221 221 221 221 189 189 189 236 236 236 236 236 236 236 236 50 50 50 50 50 175 175 175 175 175 175 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 287 287 287 48 48 48 48 107 107 107 107 107 277 277 277 193 193 193 193 193 193 236 236 236 236 236 236 51 51 51 51 51 51 51 51 51 51 51 51 272 272 272 272 272 272 272 272 272 272 272 272 272 272 272 272 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 331 331 331 331 331 331 331 331 331 331 331 53 53 53 53 233 233 233 233 233 233 117 117 117 117 117 144 144 144 144 144 35 35 35 288 288 288 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 340 340 340 340 39 39 39 39 39 39 225 225 225 225 225 49 49 49 49 49 49 177 177 177 177 177 341 341 341 341 341 37 37 37 37 37 37 37 37 233 233 233 233 233 117 117 117 117 117 144 144 144 144 144 279 279 279 279 279 279 279 279 279 273 273 273 273 273 273 133 133 133 133 233 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 144 144 144 144 144 171 171 171 171 171 144 144 144 144 144 115 115 115 115 115 115 321 321 321 321 321 321 321 189 189 189 189 189 189 189 236 236 236 236 236 236 236 191 191 191 191 191 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 53 53 53 53 288 288 288 288 288 119 119 119 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 331 331 331 331 331 305 305 305 305 305 117 117 117 49 49 49 49 49 233 233 233 288 288 288 283 283 283 283 283 283 283 283 283 283 277 277 277 277 193 193 193 193 237 237 237 237 237 237 237 237 220 220 220 220 220 220 171 171 171 171 145 145 145 228 228 228 228 47 47 47 47 117 117 117 117 117 117 117 21 21 21 21 21 21 21 21 21 273 273 273 273 273 289 289 289 189 189 189 189 236 236 236 236 236 236 236 50 50 50 50 50 179 179 179 179 179 179 179 179 249 249 249 249 249 249 249 249 249 249 249 249 249 224 224 224 224 224 224 224 1 1 1 67 67 67 67 67 67 67 67 67 67 277 277 277 277 277 173 173 173 173 173 173 173 173 173 49 49 49 49 232 232 232 47 47 47 47 47 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 49 49 49 228 228 228 187 187 187 187 187 187 172 172 172 172 283 283 283 283 283 208 208 208 208 208 287 287 287 287 287 287 305 305 305 305 305 305 220 220 220 220 220 220 191 191 191 288 288 288 288 288 187 187 187 187 187 233 233 233 233 233 233 289 289 289 289 289 289 289 289 320 320 320 320 179 179 179 179 179 179 179 144 144 144 144 144 144 144 179 179 179 179 179 179 179 179 133 133 133 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 275 275 275 275 275 275 165 165 165 165 165 165 165 165 113 113 113 113 113 49 49 49 49 49 49 224 224 224 224 331 331 331 331 331 331 305 305 305 116 116 116 179 179 179 37 37 37 328 328 328 328 223 223 223 223 223 101 101 101 101 101 101 101 101 221 221 221 221 288 288 288 287 287 287 188 188 188 188 188 279 279 279 279 279 279 279 279 289 289 289 289 289 289 164 164 164 164 164 164 164 164 164 47 47 47 233 233 233 233 289 289 289 289 289 289 289 193 193 193 224 224 224 224 224 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 337 337 337 337 316 316 316 316 316 219 219 219 219 219 219 219 219 219 165 165 165 165 165 165 165 165 165 228 228 228 228 228 228 228 228 179 179 179 249 249 249 249 249 249 249 228 228 228 228 228 228 331 331 331 331 189 189 189 120 120 120 179 179 179 189 189 189 340 340 340 340 340 187 187 187 187 229 229 229 229 229 273 273 273 273 273 273 273 273 69 69 69 69 69 277 277 277 277 277 289 289 289 189 189 189 189 116 116 116 116 116 116 116 116 116 116 67 67 67 67 67 67 67 67 277 277 277 277 277 277 173 173 173 173 173 173 173 173 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 53 53 288 288 288 288 288 288 275 275 275 275 49 49 49 49 49 49 173 173 173 173 173 173 173 225 225 225 133 133 133 133 133 221 221 221 221 221 221 221 221 289 289 289 289 289 189 189 189 189 189 236 236 236 236 236 236 236 236 119 119 119 119 49 49 49 49 288 288 288 288 191 191 191 288 288 288 288 288 331 331 331 331 305 305 305 305 116 116 116 116 107 107 107 107 107 208 208 208 208 208 208 208 208 208 208 208 50 50 50 50 50 175 175 175 175 175 175 175 175 175 175 175 305 305 305 305 305 305 305 116 116 116 116 116 116 116 116 116 287 287 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 320 320 320 320 320 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 277 277 277 277 277 277 277 277 340 340 340 340 340 340 340 340 35 35 35 35 35 288 288 288 288 288 288 223 223 223 223 223 223 209 209 209 209 209 209 209 209 209 281 281 281 281 281 281 281 288 288 288 107 107 107 107 107 189 189 189 189 189 173 173 173 173 173 173 173 173 69 69 69 69 69 69 276 276 276 276 179 179 179 179 179 179 189 189 189 340 340 340 340 340 340 340 143 143 143 143 143 143 143 143 143 143 143 143 101 101 101 101 101 101 101 101 101 101 101 329 329 329 329 49 49 49 49 49 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 219 219 219 219 219 49 49 49 49 49 233 233 233 233 221 221 221 221 221 221 221 225 225 225 225 225 321 321 321 321 321 321 117 117 117 117 117 189 189 189 189 189 189 189 116 116 116 116 287 287 287 287 188 188 188 188 188 188 175 175 175 175 248 248 248 248 248 248 248 248 248 248 51 51 51 51 51 51 51 51 51 51 51 51 272 272 272 272 272 272 119 119 119 119 48 48 48 48 48 48 48 275 275 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 116 116 116 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 275 275 275 275 275 275 275 275 275 21 21 21 21 21 21 21 109 109 109 145 145 145 145 145 145 145 288 288 288 288 288 288 107 107 107 107 107 107 133 133 133 133 133 133 133 133 225 225 225 225 225 225 225 225 225 225 225 225 225 225 340 340 340 340 340 340 340 47 47 47 47 233 233 233 116 116 116 287 287 287 287 287 287 287 133 133 133 133 133 224 224 224 224 224 224 119 119 119 119 119 48 48 48 231 231 231 231 231 231 337 337 337 337 337 337 337 337 321 321 321 321 321 321 321 321 321 321 321 321 321 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 331 331 331 305 305 305 305 116 116 116 116 116 116 116 116 279 279 279 279 279 279 279 279 279 279 149 149 149 149 149 149 149 149 149 289 289 289 289 49 49 49 233 233 233 225 225 225 204 204 204 204 204 227 227 227 227 227 227 227 227 165 165 165 165 165 165 165 220 220 220 220 220 220 220 220 50 50 50 50 279 279 279 279 279 279 279 279 129 129 129 129 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 165 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 49 49 49 49 49 49 232 232 232 232 232 279 279 279 279 279 279 279 133 133 133 133 133 221 221 221 221 221 49 49 49 49 232 232 232 232 287 287 287 287 287 287 48 48 48 231 231 231 231 231 231 231 231 231 53 53 53 53 53 53 53 53 53 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 165 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 115 115 115 115 115 115 115 193 193 193 193 193 193 277 277 277 277 277 277 277 225 225 225 225 225 225 204 204 204 204 204 204 204 204 223 223 223 223 223 223 223 223 223 223 223 223 53 53 53 53 53 53 53 53 53 329 329 329 329 329 329 329 116 116 116 287 287 287 48 48 48 48 227 227 227 227 165 165 165 165 165 165 220 220 220 220 220 220 50 50 50 50 279 279 279 279 279 279 279 279 129 129 129 129 129 233 233 233 233 233 233 233 281 281 281 281 281 281 281 165 165 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 283 283 283 283 283 283 283 208 208 208 208 208 208 287 287 287 287 287 287 287 305 305 305 305 305 305 305 305 220 220 220 179 179 179 179 145 145 145 145 145 145 281 281 281 281 281 281 281 133 133 133 133 133 225 225 225 225 225 225 172 172 172 172 172 172 47 47 47 47 47 47 333 333 333 333 333 333 333 333 333 333 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 229 229 229 229 229 229 229 229 333 333 333 333 53 53 53 53 53 53 53 53 288 288 288 287 287 287 287 287 48 48 48 227 227 227 227 227 227 145 145 145 145 145 145 145 145 145 145 145 193 193 193 193 193 225 225 225 225 225 225 225 225 225 225 225 49 49 49 49 340 340 340 340 340 340 340 340 340 340 275 275 275 275 189 189 189 189 225 225 225 225 225 225 225 225 209 209 209 209 209 209 209 209 209 209 209 209 209 209 209 172 172 172 172 172 172 172 172 172 172 172 172 172 1 1 1 1 1 1 1 1 1 171 171 171 171 171 171 171 171 171 171 171 171 69 69 69 276 276 276 276 276 119 119 119 119 48 48 48 48 48 48 223 223 223 223 223 223 223 223 223 37 37 37 37 37 37 37 37 289 289 289 289 289 289 144 144 144 144 144 144 144 171 171 171 171 171 171 171 171 171 133 133 133 133 133 225 225 225 225 225 225 225 288 288 288 179 179 179 179 179 179 179 179 144 144 144 144 144 144 144 115 115 115 115 115 115 115 115 115 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 289 289 289 289 289 289 289 289 280 280 280 280 280 47 47 47 233 233 233 233 116 116 116 116 116 171 171 171 171 171 171 171 171 193 193 193 193 193 193 193 193 193 277 277 277 277 277 277 340 340 340 340 340 340 340 340 275 275 275 275 275 275 189 189 189 189 329 329 329 329 329 329 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 329 329 329 329 329 189 189 189 189 189 189 236 236 236 236 236 236 236 236 51 51 51 51 233 233 233 233 233 233 117 117 117 117 144 144 144 119 119 119 119 119 119 204 204 204 204 204 204 191 191 191 191 191 191 233 233 233 233 233 233 233 173 173 173 173 173 173 173 173 173 173 225 225 225 225 317 317 317 49 49 49 49 233 233 233 233 233 280 280 280 280 280 280 47 47 47 47 47 328 328 328 328 328 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 340 275 275 275 275 275 165 165 165 165 165 165 165 113 113 113 113 113 49 49 49 49 49 225 225 225 225 340 340 340 340 340 340 271 271 271 271 271 271 271 271 133 133 133 133 133 281 281 281 281 281 281 281 281 281 281 281 49 49 49 49 229 229 229 229 197 197 197 197 197 197 341 341 341 341 341 341 49 49 49 49 49 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 133 133 133 224 224 224 47 47 47 47 47 328 328 328 328 328 328 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 193 193 193 193 237 237 237 237 237 237 237 237 237 237 237 237 237 340 340 340 340 119 119 119 119 49 49 49 49 49 288 288 288 288 288 288 1 1 1 131 131 131 131 131 131 131 131 131 131 131 329 329 329 329 329 329 329 144 144 144 144 144 144 144 144 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 148 148 148 148 148 148 148 148 148 148 148 148 148 148 148 148 148 148 67 67 67 67 67 67 67 276 276 276 276 276 276 276 331 331 331 331 331 331 331 331 331 331 193 193 193 193 193 193 224 224 224 224 224 224 224 224 107 107 107 107 107 107 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 217 217 217 217 217 217 217 37 37 37 37 37 37 37 37 37 221 221 221 221 221 221 337 337 337 337 317 317 317 225 225 225 225 161 161 161 161 161 161 161 161 161 289 289 289 289 289 189 189 189 189 116 116 116 116 116 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 49 224 224 224 224 331 331 331 331 331 331 331 193 193 193 193 193 232 232 232 232 232 283 283 283 283 283 283 208 208 208 208 208 331 331 331 331 331 49 49 49 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 165 165 165 165 165 165 165 165 165 173 173 173 173 173 173 173 173 173 225 225 225 225 204 204 204 204 204 204 204 204 204 204 83 83 83 83 83 83 83 288 288 288 288 288 288 288 288 288 288 288 187 187 187 232 232 232 232 119 119 119 52 52 52 52 223 223 223 223 223 223 223 223 223 223 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 187 288 288 288 288 288 115 115 115 115 115 115 115 53 53 53 53 53 53 340 340 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 209 209 209 209 225 225 225 225 225 225 225 204 204 204 204 204 204 204 279 279 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 209 228 228 228 228 131 131 131 340 340 340 340 340 340 340 340 191 191 191 191 172 172 172 172 172 172 172 172 102 102 102 102 102 227 227 227 227 227 227 53 53 53 53 53 281 281 281 281 281 288 288 288 107 107 107 107 204 204 204 204 115 115 115 115 115 115 115 277 277 277 277 277 209 209 209 209 209 209 209 229 229 229 229 229 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 133 133 133 133 133 224 224 224 224 224 224 224 224 99 99 99 99 99 99 99 99 228 228 228 228 228 228 279 279 279 279 279 279 279 279 279 279 279 279 21 21 21 21 277 277 277 277 277 277 277 277 277 204 204 204 204 204 171 171 171 171 171 171 171 69 69 69 276 276 276 276 119 119 119 119 37 37 37 37 37 37 37 37 288 288 288 288 288 288 271 271 271 271 271 271 271 271 271 271 271 321 321 321 321 321 321 321 321 276 276 276 276 276 335 335 335 335 335 335 335 335 335 335 335 53 53 53 53 53 53 236 236 236 236 236 236 236 236 236 236 331 331 331 331 331 331 53 53 53 53 232 232 232 232 232 35 35 35 35 233 233 233 233 116 116 116 231 231 231 231 231 231 231 231 248 248 248 248 248 248 227 227 227 227 227 227 189 189 189 189 281 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 165 165 165 165 165 165 165 165 165 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 316 316 316 316 47 47 47 233 233 233 116 116 116 227 227 227 17 17 17 277 277 277 277 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 48 48 48 48 48 48 48 115 115 115 115 115 115 115 115 115 249 249 249 249 249 249 249 232 232 232 232 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 248 248 131 131 131 131 131 131 131 233 233 233 233 233 205 205 205 205 205 205 205 205 293 293 293 293 293 293 293 197 197 197 236 236 236 236 236 236 47 47 47 47 109 109 109 109 109 85 85 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 111 111 111 111 111 111 111 111 193 193 193 193 225 225 225 225 225 225 117 117 117 117 117 277 277 277 277 277 49 49 49 49 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 119 119 119 165 165 165 165 165 224 224 224 224 224 224 224 187 187 187 187 187 221 221 221 221 221 281 281 281 281 281 281 273 273 273 273 273 273 133 133 133 133 221 221 221 221 221 221 221 221 288 288 288 288 288 288 187 187 187 228 228 228 228 228 228 228 287 287 287 287 287 188 188 188 107 107 107 107 107 204 204 204 204 204 204 204 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 341 341 341 341 341 341 341 341 341 341 144 144 144 144 144 144 47 47 47 47 47 233 233 233 116 116 116 116 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 133 133 133 117 117 117 117 117 205 205 205 205 205 205 205 205 205 144 144 144 144 119 119 119 119 119 119 49 49 49 288 288 288 179 179 179 189 189 189 189 340 340 340 340 340 247 247 247 247 247 247 247 247 247 247 247 247 247 232 232 232 232 232 232 232 232 232 232 232 175 175 175 175 175 175 277 277 277 277 37 37 37 37 37 37 37 37 37 233 233 233 233 233 233 233 233 173 173 173 173 173 173 173 173 173 25 25 25 25 25 25 25 25 121 121 121 121 121 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 229 229 229 229 229 229 229 229 340 340 340 340 340 1 1 1 47 47 47 47 47 47 47 47 47 47 233 233 233 233 233 233 233 233 233 233 233 233 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 221 37 37 37 37 233 233 233 233 233 204 204 204 204 204 204 204 204 204 287 287 287 287 287 287 287 188 188 188 188 291 291 291 291 193 193 193 237 237 237 237 220 220 220 47 47 47 47 328 328 328 328 50 50 50 50 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 101 101 101 101 101 101 101 225 225 225 225 225 225 225 116 116 116 116 116 35 35 35 288 288 288 288 288 288 288 175 175 175 175 175 175 277 277 277 209 209 209 209 209 209 209 209 232 232 232 232 232 175 175 175 175 175 165 165 165 165 165 165 165 165 165 165 109 109 109 109 109 49 49 49 225 225 225 225 225 225 340 340 340 340 279 279 279 279 279 279 279 279 53 53 53 53 53 53 229 229 229 229 229 181 181 181 181 181 181 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 133 133 133 277 277 277 340 340 340 340 340 340 340 231 231 231 133 133 133 133 329 329 329 329 329 329 144 144 144 144 107 107 107 107 107 107 193 193 193 193 193 193 232 232 232 232 232 232 232 232 331 331 331 331 331 331 331 331 53 53 53 53 53 232 232 232 232 232 232 232 119 119 119 119 133 133 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 171 171 171 171 171 171 171 171 171 171 171 171 69 69 69 276 276 276 276 276 276 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 316 316 316 316 47 47 47 47 233 233 233 116 116 116 227 227 227 17 17 17 277 277 277 277 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 225 225 225 225 225 225 48 48 48 48 48 48 48 331 331 331 331 331 331 144 144 144 144 144 144 144 175 175 175 175 175 175 277 277 277 277 277 277 249 249 249 249 249 249 249 249 249 249 232 232 232 232 232 232 232 51 51 51 51 51 51 51 272 272 272 272 272 272 272 272 272 272 331 331 331 331 331 133 133 133 133 232 232 232 232 232 232 119 119 119 48 48 48 48 231 231 231 337 337 337 337 337 337 320 320 320 320 320 320 320 320 179 179 179 179 179 179 179 179 179 179 179 85 85 85 85 85 85 85 85 85 280 280 280 280 280 280 280 280 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 340 107 107 107 107 107 193 193 193 193 193 225 225 225 225 225 225 225 225 225 225 225 225 225 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 187 187 172 172 172 172 172 119 119 119 119 164 164 164 164 164 164 164 164 164 164 164 164 164 131 131 131 131 131 131 329 329 329 329 329 329 144 144 144 144 144 144 331 331 331 331 331 331 331 331 331 331 331 331 331 148 148 148 148 148 148 148 148 148 148 148 148 148 148 111 111 111 111 111 111 111 111 111 111 111 111 193 193 193 193 225 225 225 225 225 225 225 117 117 117 117 117 117 277 277 277 277 277 49 49 49 49 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 193 193 193 193 193 112 112 112 112 112 112 112 187 187 187 187 340 340 340 340 340 340 179 179 179 179 179 21 21 21 21 21 277 277 277 277 277 277 116 116 116 116 287 287 287 287 48 48 48 48 107 107 107 107 107 189 189 189 225 225 225 225 225 225 225 209 209 209 209 209 209 209 209 209 209 328 328 328 328 328 328 328 331 331 331 331 133 133 133 232 232 232 232 232 331 331 331 331 331 331 53 53 53 53 232 232 232 232 232 232 232 223 223 223 223 305 305 305 305 305 221 221 221 221 221 221 221 221 280 280 280 280 280 280 35 35 35 35 288 288 288 288 119 119 119 119 49 49 49 49 49 49 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 331 331 331 331 331 331 331 331 331 331 305 305 305 305 305 117 117 117 117 49 49 49 49 233 233 233 233 233 233 288 288 288 107 107 107 107 107 208 208 208 208 208 208 208 208 208 208 208 208 208 191 191 191 191 191 232 232 232 232 232 232 232 119 119 119 119 119 37 37 37 37 37 37 288 288 288 288 288 67 67 67 67 67 67 67 67 277 277 277 277 277 277 277 173 173 173 173 173 173 49 49 49 49 49 233 233 233 233 233 233 233 340 340 340 340 283 283 283 283 283 283 283 283 283 283 283 283 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 340 340 340 340 340 340 340 340 340 340 171 171 171 171 171 171 171 144 144 144 144 144 144 144 144 144 144 131 131 131 131 233 233 233 233 233 233 233 205 205 205 205 205 205 205 293 293 293 293 293 293 293 197 197 197 197 236 236 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 227 227 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 107 107 107 107 107 53 53 53 288 288 288 288 288 102 102 102 102 102 102 102 102 102 102 102 271 271 271 271 271 271 271 271 271 271 271 271 271 271 193 193 193 289 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 179 179 179 179 179 193 193 193 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 37 37 37 37 37 37 37 37 37 289 289 289 289 289 280 280 280 280 280 280 280 280 280 331 331 331 331 331 331 53 53 53 53 53 53 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 279 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 133 133 116 116 116 116 116 116 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 189 189 189 189 189 340 340 340 340 340 340 340 340 340 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 49 49 49 49 49 224 224 224 224 224 224 287 287 287 287 287 287 287 287 48 48 48 119 119 119 119 119 52 52 52 52 52 331 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 116 116 116 116 116 116 116 116 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 340 340 340 340 107 107 107 107 107 305 305 305 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 49 49 49 49 340 340 340 340 340 340 340 340 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 47 47 47 47 47 328 328 328 119 119 119 119 52 52 52 52 52 171 171 171 171 171 171 171 171 171 49 49 49 49 49 225 225 225 225 225 225 225 225 233 233 233 233 233 49 49 49 49 49 280 280 280 280 280 280 280 47 47 47 47 328 328 328 179 179 179 179 179 144 144 144 144 144 144 144 179 179 179 179 179 179 179 179 179 21 21 21 21 21 21 21 277 277 277 277 277 277 277 277 277 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 111 111 111 111 111 111 111 111 111 111 37 37 37 37 37 37 37 273 273 273 273 289 289 289 289 289 289 144 144 144 144 144 144 144 287 287 287 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 316 316 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 53 293 293 293 293 293 293 293 293 293 109 109 109 109 109 145 145 145 145 145 145 145 145 145 145 288 288 288 288 288 1 1 1 187 187 187 187 187 187 187 340 340 340 340 279 279 279 279 279 279 279 49 49 49 49 273 273 273 273 273 273 273 277 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 341 341 341 341 341 341 341 341 341 341 341 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 316 316 219 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 53 53 53 293 293 293 293 293 293 293 293 293 293 109 109 109 109 109 145 145 145 145 145 145 145 145 145 145 145 288 288 288 288 288 288 35 35 35 35 35 233 233 233 116 116 116 119 119 119 48 48 48 48 48 279 279 279 279 279 279 279 279 279 279 279 279 69 69 69 69 277 277 277 277 277 277 49 49 49 49 49 49 49 224 224 224 224 224 224 224 227 227 227 227 227 227 227 227 227 133 133 133 133 133 133 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 276 215 215 215 215 215 215 215 215 215 215 215 215 215 215 21 21 21 21 21 21 21 21 21 21 21 177 177 177 177 177 116 116 116 116 116 116 116 116 116 116 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 229 229 229 229 229 229 229 173 173 173 145 145 145 289 289 289 49 49 49 49 49 109 109 109 109 109 225 225 225 225 225 204 204 204 204 204 204 204 204 204 247 247 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 144 144 144 144 144 119 119 119 119 204 204 204 204 204 204 204 204 204 204 204 204 163 163 163 163 163 163 163 163 163 163 163 288 288 288 288 288 288 288 288 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 287 287 287 287 48 48 48 48 107 107 107 107 107 107 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 275 275 275 275 275 275 275 193 193 193 193 193 193 329 329 329 329 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 288 288 288 288 331 331 331 331 49 49 49 340 340 340 340 340 50 50 50 50 50 50 50 271 271 271 271 271 271 271 277 277 277 277 277 193 193 193 289 289 289 289 289 204 204 204 204 204 204 204 204 204 275 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 275 275 275 275 275 275 275 275 275 275 275 275 275 53 53 53 53 53 53 233 233 233 233 233 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 47 47 47 47 47 47 225 225 225 225 225 225 225 225 225 69 69 69 69 69 69 69 69 69 69 69 69 236 236 236 236 236 236 107 107 107 107 189 189 189 189 289 289 289 289 289 289 289 289 289 333 333 333 209 209 209 209 209 209 209 209 232 232 232 232 232 232 232 232 232 232 279 279 279 279 279 279 279 279 279 279 279 233 233 233 233 233 233 53 53 53 53 53 53 53 53 53 53 53 176 176 176 176 176 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 21 21 21 21 277 277 277 277 277 277 277 229 229 229 229 229 229 229 281 281 281 281 281 281 289 289 289 289 289 289 289 137 137 137 137 137 137 137 137 137 137 117 117 117 117 117 117 340 340 340 340 340 340 340 340 331 331 331 331 193 193 193 193 193 193 193 193 292 292 292 292 292 292 292 292 292 231 231 231 231 231 231 231 231 84 84 84 84 84 84 84 84 84 84 84 84 84 47 47 47 233 233 233 116 116 116 47 47 47 47 177 177 177 177 177 177 177 177 177 177 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 133 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 50 50 50 50 50 50 50 50 50 50 50 107 107 107 107 107 107 193 193 193 288 288 288 288 288 47 47 47 47 328 328 328 328 328 328 328 1 1 1 1 1 1 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 148 148 148 148 148 148 148 148 148 148 148 148 148 148 148 331 331 331 331 331 331 331 331 331 331 331 331 305 305 305 305 305 305 305 116 116 116 116 116 287 287 287 287 287 188 188 188 188 115 115 115 115 115 115 115 115 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 101 328 328 328 328 291 291 291 291 291 291 291 291 291 291 291 291 291 291 277 277 277 277 277 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 67 67 67 67 67 67 67 67 67 276 276 276 276 276 276 276 50 50 50 50 50 179 179 179 179 179 179 179 179 179 179 179 179 179 179 21 21 21 225 225 225 225 225 225 225 225 225 225 225 244 244 244 244 244 244 244 244 244 244 244 244 331 331 331 331 331 331 133 133 133 133 133 133 133 276 276 276 276 276 276 331 331 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 116 116 116 116 116 116 116 271 271 271 271 271 271 271 271 271 271 271 271 271 225 225 225 225 225 53 53 53 53 53 53 53 229 229 229 229 229 229 229 229 229 229 229 229 229 340 340 340 340 340 340 340 340 340 179 179 179 179 179 179 179 179 53 53 53 53 236 236 236 236 236 236 236 236 236 236 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 288 288 119 119 119 119 119 133 133 133 133 133 133 276 276 276 276 276 276 171 171 171 171 171 171 171 171 171 171 171 171 171 193 193 193 193 193 193 225 225 225 225 225 225 229 229 229 229 229 229 204 204 204 204 204 204 204 204 107 107 107 107 107 107 107 107 225 225 225 225 225 225 225 321 321 321 321 321 321 321 321 321 321 321 321 321 321 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 119 119 119 204 204 204 204 204 204 204 204 131 131 131 131 131 131 131 131 131 131 131 131 276 276 276 276 276 276 276 276 276 276 276 276 331 331 331 331 331 49 49 49 340 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 279 279 333 333 333 333 209 209 209 209 209 209 209 209 209 209 209 288 288 288 288 288 288 288 288 331 331 331 331 189 189 189 189 292 292 292 292 119 119 119 119 52 52 52 52 52 107 107 107 107 107 107 107 107 107 107 277 277 277 277 277 277 133 133 133 133 133 133 133 133 133 292 292 292 292 292 292 292 292 292 292 47 47 47 47 328 328 328 328 328 328 328 328 227 227 227 227 227 227 227 133 133 133 233 233 233 233 233 233 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 273 273 273 273 273 273 273 273 49 49 49 49 224 224 224 224 224 224 224 224 67 67 67 67 67 67 67 67 67 277 277 277 277 277 277 113 113 113 113 113 113 113 113 113 113 113 113 113 113 145 145 145 145 145 145 145 145 117 117 117 117 117 117 117 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 116 119 119 119 52 52 52 227 227 227 227 227 227 227 227 227 227 227 133 133 133 133 133 133 117 117 117 117 117 117 253 253 253 253 253 253 253 253 253 253 253 253 340 340 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 225 225 225 225 225 225 249 249 249 249 249 249 249 273 273 273 273 273 273 273 273 288 288 288 288 288 47 47 47 47 47 47 333 333 333 333 333 333 333 164 164 164 164 164 164 164 164 164 164 187 187 187 187 187 187 232 232 232 232 232 119 119 119 48 48 48 48 115 115 115 115 115 115 115 115 115 193 193 193 193 193 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 289 49 49 49 49 49 233 233 233 233 233 233 233 280 280 280 280 280 280 287 287 287 287 287 48 48 48 48 48 48 179 179 179 179 179 179 179 145 145 145 145 145 145 145 145 145 145 145 145 145 145 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 341 341 341 341 341 341 341 341 341 341 341 49 49 49 49 232 232 232 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 193 193 193 193 193 193 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 289 289 289 289 289 289 280 280 280 280 280 47 47 47 47 328 328 328 328 328 328 328 328 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 47 47 47 47 47 233 233 233 233 116 116 116 116 116 116 116 1 1 1 271 271 271 271 271 271 271 271 271 149 149 149 149 149 149 149 273 273 273 273 273 273 273 273 273 273 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 331 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 224 224 224 224 224 224 224 224 119 119 119 119 119 119 48 48 48 48 48 223 223 223 223 223 223 193 193 193 193 289 289 289 289 289 49 49 49 49 49 224 224 224 224 224 224 107 107 107 107 107 107 107 107 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 117 117 117 117 117 117 117 340 340 340 279 279 279 279 279 279 279 279 279 279 279 279 37 37 37 37 37 37 37 37 37 37 37 37 236 236 236 236 236 236 236 236 236 236 236 236 236 131 131 131 131 340 340 340 340 340 340 340 340 340 191 191 191 191 172 172 172 172 172 172 172 172 191 191 191 191 288 288 288 288 288 288 288 288 288 288 288 288 288 331 331 331 331 331 331 148 148 148 148 148 148 148 148 148 148 148 148 148 148 119 119 119 119 119 52 52 52 52 52 52 52 331 331 331 331 331 331 331 331 331 331 331 331 331 53 53 53 53 53 53 232 232 232 232 232 232 232 232 232 232 232 232 232 232 115 115 115 115 115 164 164 164 164 164 164 164 164 164 164 47 47 47 47 47 328 328 328 328 328 328 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 229 229 229 229 229 229 229 229 229 229 144 144 144 144 144 187 187 187 187 187 187 232 232 232 232 232 232 232 232 67 67 67 67 67 67 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 119 119 119 119 119 119 204 204 204 204 204 204 335 335 335 335 335 335 335 335 335 335 193 193 193 193 193 193 193 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 316 187 187 187 187 187 187 233 233 233 233 233 233 233 217 217 217 217 217 217 217 265 265 265 265 265 265 265 265 265 265 265 265 116 116 116 116 116 116 119 119 119 52 52 52 52 115 115 115 115 115 115 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 101 101 328 328 328 328 328 35 35 35 35 35 35 35 35 35 173 173 173 173 173 289 289 289 289 289 289 144 144 144 179 179 179 189 189 189 340 340 340 340 340 340 340 247 247 247 247 247 247 247 247 247 247 247 247 232 232 232 232 232 232 232 171 171 171 171 171 171 171 171 171 171 171 37 37 37 37 37 37 37 37 37 37 285 285 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 221 221 221 221 221 281 281 281 281 281 281 281 133 133 133 133 133 133 133 133 273 273 273 273 288 288 288 115 115 115 305 305 305 277 277 277 189 189 189 189 189 189 189 236 236 236 236 119 119 119 119 48 48 48 48 227 227 227 227 227 227 249 249 249 249 249 249 249 249 249 229 229 229 229 229 229 229 49 49 49 49 233 233 233 289 289 289 280 280 280 280 280 280 331 331 331 193 193 193 232 232 232 179 179 179 179 179 179 179 179 179 208 208 208 208 227 227 227 227 227 227 227 227 227 227 133 133 133 133 133 133 133 288 288 288 288 288 288 288 288 288 288 331 331 331 331 331 331 193 193 193 193 193 229 229 229 229 229 229 229 49 49 49 49 49 49 232 232 232 232 232 232 47 47 47 47 233 233 233 116 116 116 116 179 179 179 179 179 179 179 179 37 37 37 37 37 37 116 116 116 116 287 287 287 287 287 287 287 48 48 48 48 231 231 231 231 231 231 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 21 21 21 116 116 116 116 116 116 287 287 287 287 287 287 287 48 48 48 48 48 119 119 119 119 119 49 49 49 49 49 49 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 171 171 171 171 171 171 171 171 171 171 69 69 69 69 69 69 69 69 276 276 276 276 276 276 276 187 187 187 187 187 232 232 232 232 232 232 271 271 271 271 271 277 277 277 277 277 277 193 193 193 233 233 233 233 233 233 233 233 280 280 280 280 280 280 280 131 131 131 131 131 117 117 117 117 117 117 117 333 333 333 145 145 145 116 116 116 116 116 116 99 99 99 99 99 99 99 99 99 99 99 99 99 225 225 225 225 225 225 225 49 49 49 49 49 49 233 233 233 233 233 116 116 116 116 335 335 335 335 320 320 320 320 320 320 320 146 146 146 146 146 146 146 146 279 279 279 279 279 279 49 49 49 49 273 273 273 273 273 273 249 249 249 249 249 249 249 249 249 249 341 341 341 341 341 341 341 341 341 341 116 116 116 287 287 287 188 188 188 188 188 231 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 21 21 116 116 116 287 287 287 287 320 320 320 320 320 320 320 320 320 320 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 224 47 47 47 47 47 233 233 233 116 116 116 116 279 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 233 233 233 233 233 233 117 117 117 117 277 277 277 277 204 204 204 204 335 335 335 335 335 335 335 335 320 320 320 320 320 227 227 227 227 227 227 227 209 209 209 209 209 209 288 288 288 288 19 19 19 19 19 19 19 19 232 232 232 232 119 119 119 52 52 52 52 52 52 52 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 249 249 249 249 249 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 133 133 133 133 133 133 121 121 121 121 144 144 144 144 335 335 335 335 335 335 335 335 320 320 320 320 320 231 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 119 119 119 119 119 49 49 49 228 228 228 228 228 228 228 228 146 146 146 146 146 146 146 146 231 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 337 337 337 337 316 316 316 316 316 316 115 115 115 115 115 115 115 277 277 277 277 277 277 133 133 133 133 117 117 117 117 117 189 189 189 189 189 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 224 224 224 331 331 331 331 331 331 331 193 193 193 193 193 229 229 229 229 229 229 229 229 49 49 49 49 49 49 49 232 232 232 232 232 232 232 232 187 187 187 187 187 187 187 187 221 221 221 221 221 221 221 281 281 281 281 281 281 281 281 133 133 133 133 133 273 273 273 273 273 273 273 288 288 288 288 288 288 288 227 227 227 227 227 227 227 227 227 227 17 17 17 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 225 48 48 48 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 116 116 116 116 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 165 165 165 165 165 165 165 165 165 113 113 113 113 113 113 113 49 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 208 208 208 208 179 179 179 179 179 179 179 179 179 179 179 37 37 37 116 116 116 116 116 47 47 47 232 232 232 232 232 232 47 47 47 47 47 47 233 233 233 233 233 233 233 233 221 221 221 221 221 221 221 221 221 221 53 53 53 53 53 229 229 229 229 229 229 173 173 173 173 173 145 145 145 289 289 289 49 49 49 49 109 109 109 109 49 49 49 49 224 224 224 224 224 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 209 209 209 209 209 209 209 209 209 209 225 225 225 225 225 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 49 49 49 288 288 288 119 119 119 52 52 52 52 227 227 227 227 189 189 189 189 189 281 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 193 193 193 277 277 277 277 277 277 277 205 205 205 205 205 205 205 49 49 49 49 49 280 280 280 280 280 280 280 280 280 280 219 219 219 219 219 219 219 219 219 219 219 277 277 277 277 277 209 209 209 209 209 113 113 113 113 113 113 113 113 113 113 113 145 145 145 145 145 145 340 340 340 340 340 340 340 340 331 331 331 144 144 144 144 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 221 221 221 221 221 221 277 277 277 277 189 189 189 189 189 289 289 289 289 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 223 223 223 223 223 223 223 223 223 223 37 37 37 37 37 37 37 37 37 37 37 37 37 173 173 173 173 173 173 173 173 173 173 173 189 189 189 236 236 236 236 236 236 236 236 35 35 35 35 35 35 35 35 35 288 288 288 288 288 179 179 179 179 179 179 193 193 193 193 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 208 208 208 208 208 227 227 227 227 227 227 227 164 164 164 164 179 179 179 179 179 179 37 37 37 328 328 328 107 107 107 49 49 49 49 232 232 232 232 232 219 219 219 219 219 219 219 333 333 333 333 333 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 275 275 275 275 275 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 187 187 187 187 232 232 232 232 232 291 291 291 291 291 291 291 193 193 193 237 237 237 237 237 237 221 221 221 221 221 221 221 221 189 189 189 236 236 236 236 236 236 236 236 236 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 171 171 171 171 171 171 171 171 171 69 69 69 276 276 276 276 276 179 179 179 179 179 208 208 208 208 208 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 47 47 47 47 232 232 232 232 232 232 232 232 1 1 1 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 116 116 116 116 116 116 116 223 223 223 223 305 305 305 305 305 221 221 221 221 221 221 221 189 189 189 189 236 236 236 236 236 236 236 271 271 271 271 271 271 271 271 271 271 271 149 149 149 149 149 149 149 149 281 281 281 281 281 281 281 281 281 281 189 189 189 233 233 233 189 189 189 189 189 189 189 189 189 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 1 1 1 1 1 1 331 331 331 331 331 331 189 189 189 120 120 120 120 47 47 47 232 232 232 232 232 47 47 47 47 47 47 47 47 233 233 233 233 233 233 233 233 233 233 177 177 177 177 177 177 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 233 233 233 233 233 233 233 225 225 225 225 204 204 204 204 204 204 204 204 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 193 193 193 193 193 193 193 177 177 177 177 337 337 337 337 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 116 116 116 116 116 116 223 223 223 223 223 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 236 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 145 145 145 232 232 232 232 232 232 232 232 232 232 232 232 175 175 175 175 175 175 277 277 277 277 277 277 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 133 133 133 133 133 133 133 133 133 133 133 276 276 276 276 276 276 276 276 119 119 119 119 119 119 49 49 49 49 288 288 288 288 288 287 287 287 287 287 287 287 287 287 287 287 287 53 53 53 53 53 53 53 53 113 113 113 113 113 113 113 113 113 113 113 113 288 288 288 179 179 179 179 189 189 189 340 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 321 321 321 321 321 321 321 321 273 273 273 273 273 273 273 189 189 189 236 236 236 236 236 236 236 236 283 283 283 283 283 283 283 283 283 283 283 283 283 283 249 249 249 249 249 249 249 249 249 225 225 225 225 225 225 225 117 117 117 117 117 145 145 145 145 145 145 145 145 145 145 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 116 50 50 50 50 50 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 305 305 305 305 305 305 305 305 305 305 305 305 305 305 305 305 305 305 305 305 305 305 305 224 224 224 224 224 224 224 224 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 69 69 69 69 69 69 69 69 69 69 69 69 69 69 173 173 173 173 173 173 173 173 173 288 288 288 288 288 288 288 288 288 107 107 107 107 107 277 277 277 277 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 107 107 107 107 107 107 193 193 193 193 193 193 193 193 193 193 193 193 193 193 193 277 277 277 277 277 277 277 277 277 277 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 331 331 331 331 331 331 331 193 193 193 193 193 112 112 112 112 112 112 112 179 179 179 179 208 208 208 179 179 179 37 37 37 116 116 116 116 116 116 116 116 331 331 331 331 331 331 69 69 69 69 69 69 69 277 277 277 277 277 232 232 232 232 232 131 131 131 131 329 329 329 329 329 329 329 144 144 144 144 144 279 279 279 279 279 279 279 279 193 193 193 193 233 233 233 233 233 233 280 280 280 179 179 179 208 208 208 331 331 331 331 49 49 49 49 340 340 340 340 340 340 287 287 287 287 287 287 287 287 333 333 333 133 133 133 233 233 233 233 233 233 233 289 289 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 232 232 232 232 232 232 232 171 171 171 171 171 171 171 171 171 171 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 221 221 221 221 221 221 221 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 208 208 208 208 208 208 179 179 179 179 179 179 37 37 37 37 37 37 116 116 116 116 116 116 116 116 223 223 223 223 223 305 305 305 305 305 305 221 221 221 221 221 221 221 288 288 288 288 288 288 35 35 35 35 35 288 288 288 288 288 287 287 287 287 287 287 287 287 333 333 333 333 133 133 133 133 233 233 233 233 233 233 289 289 289 289 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 327 327 327 327 327 327 327 327 327 327 327 133 133 133 133 133 277 277 277 277 277 277 204 204 204 204 204 204 204 227 227 227 227 227 227 227 227 227 53 53 53 53 53 53 53 112 112 112 112 112 112 112 131 131 131 340 340 340 340 340 340 179 179 179 208 208 208 208 208 208 223 223 223 223 223 305 305 305 305 305 221 221 221 221 221 288 288 288 288 288 35 35 35 288 288 288 288 288 288 288 279 279 279 279 279 279 279 279 193 193 193 193 221 221 221 221 221 281 281 281 281 281 281 289 289 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 223 223 223 223 223 223 223 223 223 223 223 223 223 223 37 37 37 37 37 37 37 37 221 221 221 221 221 221 221 221 189 189 189 189 189 236 236 236 236 236 236 236 236 236 50 50 50 50 223 223 223 223 223 223 193 193 193 193 289 289 289 289 289 49 49 49 49 49 224 224 224 224 224 47 47 47 328 328 328 328 328 119 119 119 119 52 52 52 175 175 175 175 175 175 277 277 277 277 277 165 165 165 165 165 165 165 165 165 233 233 233 233 233 233 233 49 49 49 49 49 49 49 49 49 49 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 133 133 133 133 133 232 232 232 179 179 179 179 179 179 208 208 208 208 208 208 208 208 275 275 275 275 275 275 275 209 209 209 209 209 209 209 113 113 113 113 113 113 113 113 113 288 288 288 288 288 288 288 288 288 107 107 107 107 107 107 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 275 275 275 275 275 275 275 275 193 193 193 193 193 329 329 329 329 329 329 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 119 119 133 133 133 276 276 276 276 276 276 331 331 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 340 340 231 231 231 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 279 279 279 279 279 279 279 279 279 279 279 279 279 279 101 101 101 101 101 101 101 101 101 101 101 101 101 232 232 232 232 232 47 47 47 47 328 328 328 328 328 328 131 131 131 131 233 233 233 233 233 204 204 204 204 204 204 204 287 287 287 287 287 287 287 287 287 287 287 277 277 277 277 277 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 208 208 208 208 208 208 208 291 291 291 291 291 291 69 69 69 69 69 69 69 69 69 69 288 288 288 288 179 179 179 208 208 208 208 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 320 320 320 320 320 147 147 147 147 147 147 147 147 147 147 147 225 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 179 179 179 179 179 179 179 179 208 208 208 208 208 208 208 287 287 287 287 287 287 287 287 287 287 287 287 101 101 101 101 101 101 101 101 101 101 101 116 116 116 116 179 179 179 179 189 189 189 189 340 340 340 340 340 340 340 340 179 179 179 179 179 69 69 69 69 69 69 69 277 277 277 277 277 277 280 280 280 280 280 280 280 280 187 187 187 187 232 232 232 119 119 119 119 204 204 204 204 335 335 335 335 335 335 335 21 21 21 21 21 21 21 21 21 21 21 277 277 277 277 116 116 116 116 47 47 47 328 328 328 328 119 119 119 52 52 52 52 52 279 279 279 279 279 279 279 279 279 279 279 279 279 229 229 229 229 229 229 229 229 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 224 224 224 224 224 224 224 224 1 1 1 107 107 107 107 107 107 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 275 275 275 275 275 193 193 193 193 329 329 329 329 329 329 144 144 144 144 144 179 179 179 179 179 179 245 245 245 245 245 245 245 245 245 245 289 289 289 289 289 289 289 289 289 289 289 289 289 133 133 133 133 133 133 133 133 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 116 116 331 331 331 331 133 133 133 133 133 133 233 233 233 233 233 288 288 288 288 288 247 247 247 247 247 247 247 247 247 247 329 329 329 329 144 144 144 144 144 144 287 287 287 287 287 287 287 188 188 188 188 188 119 119 119 119 52 52 52 52 52 279 279 279 279 279 279 279 289 289 289 289 289 289 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 49 49 49 232 232 232 232 179 179 179 179 179 179 179 179 85 85 85 85 85 85 85 85 85 85 85 85 85 85 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 52 52 52 52 52 52 52 223 223 223 223 223 223 223 223 69 69 69 69 69 69 69 69 69 69 69 69 69 236 236 236 236 236 236 236 236 236 236 236 271 271 271 271 271 271 271 271 271 225 225 225 225 225 225 37 37 37 37 37 37 37 37 37 37 37 289 289 289 173 173 173 173 173 173 173 173 173 73 73 73 73 73 277 277 277 277 277 228 228 228 228 228 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 1 1 1 67 67 67 67 67 67 67 67 67 67 67 225 225 225 225 225 229 229 229 229 229 253 253 253 253 253 253 281 281 281 281 281 288 288 288 115 115 115 115 189 189 189 189 189 341 341 341 341 341 341 341 341 341 149 149 149 149 149 149 149 149 149 149 289 289 289 289 289 189 189 189 189 189 189 189 189 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 204 204 204 204 204 204 204 204 247 247 247 247 247 247 247 247 247 247 247 247 247 233 233 233 233 225 225 225 225 225 204 204 204 204 204 204 204 204 204 223 223 223 223 223 223 193 193 193 193 329 329 329 329 329 189 189 189 189 236 236 236 236 236 236 236 219 219 219 219 219 219 219 277 277 277 277 277 277 209 209 209 209 209 113 113 113 113 113 113 113 113 113 113 144 144 144 144 144 144 187 187 187 187 187 232 232 232 232 232 232 232 279 279 279 279 279 279 279 279 279 279 279 279 279 101 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 1 1 1 107 107 107 107 107 209 209 209 209 209 209 209 209 209 209 189 189 189 189 189 236 236 236 236 236 236 236 236 50 50 50 50 50 175 175 175 175 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 224 179 179 179 179 179 320 320 320 320 320 331 331 331 331 331 331 331 49 49 49 340 340 340 340 340 279 279 279 279 279 279 279 279 279 193 193 193 193 193 289 289 289 289 189 189 189 189 189 189 189 189 189 236 236 236 236 236 236 236 67 67 67 67 67 232 232 232 232 232 50 50 50 50 50 50 271 271 271 271 271 271 271 271 271 271 271 271 101 101 101 101 101 101 101 101 101 101 101 101 101 224 224 224 224 224 224 47 47 47 47 328 328 328 328 283 283 283 283 283 283 283 283 283 283 283 283 193 193 193 193 193 193 237 237 237 237 237 237 237 237 177 177 177 177 49 49 49 49 225 225 225 225 225 225 225 340 340 340 340 340 340 35 35 35 35 288 288 288 119 119 119 119 119 119 204 204 204 204 204 127 127 127 221 221 221 221 221 221 221 221 221 221 281 281 281 281 281 289 289 289 289 289 289 289 289 289 277 277 277 277 209 209 209 209 209 209 209 209 209 209 209 209 209 228 228 228 228 228 228 228 228 131 131 131 131 131 131 131 131 131 131 233 233 233 233 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 293 337 337 337 337 337 337 337 337 316 316 316 316 316 316 316 316 316 316 107 107 107 107 107 107 107 107 133 133 133 133 277 277 277 277 277 277 277 277 277 277 277 277 225 225 225 225 225 225 204 204 204 204 231 231 231 231 231 231 231 231 231 231 249 249 249 249 249 289 289 289 289 289 289 289 289 289 289 289 289 289 289 289 289 289 289 289 189 189 189 236 236 236 236 236 236 236 236 119 119 119 49 49 49 288 288 288 288 191 191 191 191 288 288 288 288 288 288 331 331 331 331 331 331 331 331 331 331 331 331 21 21 21 21 21 340 340 340 340 340 340 340 340 50 50 50 50 50 175 175 175 175 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 101 101 101 101 101 101 101 101 101 101 101 101 101 117 117 117 117 117 49 49 49 49 225 225 225 225 225 116 116 116 116 116 116 116 271 271 271 271 271 271 271 271 271 271 271 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 281 288 288 288 179 179 179 179 179 144 144 144 144 144 144 131 131 131 131 340 340 340 340 340 340 340 340 340 340 340 340 219 219 219 219 219 219 219 219 333 333 333 333 193 193 193 193 221 221 221 221 221 221 221 221 221 225 225 225 225 225 204 204 204 204 204 204 131 131 131 131 340 340 340 340 340 340 340 340 271 271 271 271 271 271 271 21 21 21 21 21 21 21 21 281 281 281 281 281 281 281 281 49 49 49 49 109 109 109 49 49 49 49 224 224 224 224 331 331 331 331 331 189 189 189 121 121 121 121 121 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 223 223 223 223 223 223 305 305 305 305 221 221 221 221 221 221 221 221 189 189 189 189 236 236 236 236 236 236 236 236 35 35 35 35 35 35 288 288 288 288 179 179 179 179 179 179 179 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 116 116 116 116 179 179 179 179 179 179 179 179 179 208 208 208 208 208 208 208 223 223 223 223 223 223 223 305 305 305 305 305 305 305 221 221 221 221 221 221 221 221 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 208 208 208 208 219 219 219 219 219 219 219 219 305 305 305 116 116 116 179 179 179 179 179 179 179 179 179 179 179 179 21 21 21 21 21 21 277 277 277 277 277 117 117 117 117 117 225 225 225 225 225 204 204 204 179 179 179 37 37 37 328 328 328 328 171 171 171 171 171 171 171 171 171 171 171 171 165 165 165 165 165 165 165 225 225 225 225 225 225 225 225 116 116 116 287 287 287 287 287 287 48 48 48 48 231 231 231 231 231 231 231 231 231 231 231 249 249 249 249 249 249 249 249 249 289 289 289 289 289 189 189 189 189 189 189 189 189 280 280 280 280 280 280 280 280 280 280 119 119 119 119 52 52 52 52 52 52 287 287 287 287 287 287 287 287 287 287 287 287 287 287 133 133 133 133 233 233 233 233 233 233 233 233 233 233 233 280 280 280 280 280 280 280 280 280 280 280 280 275 275 275 275 189 189 189 217 217 217 217 217 217 217 217 217 217 217 217 217 193 193 193 193 117 117 117 117 49 49 49 49 49 289 289 289 289 289 289 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 204 204 47 47 47 47 47 233 233 233 116 116 116 135 135 135 135 135 135 221 221 221 221 221 221 221 281 281 281 281 281 281 281 273 273 273 273 129 129 129 129 221 221 221 221 221 221 221 221 221 289 289 289 289 289 289 289 289 289 289 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 232 232 232 47 47 47 47 328 328 328 328 179 179 179 179 179 179 144 144 144 144 144 144 144 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 289 289 289 289 289 49 49 49 49 49 49 289 289 289 289 289 289 289 289 289 289 289 325 325 325 325 325 325 325 325 116 116 116 116 116 35 35 35 35 35 233 233 233 233 116 116 116 187 187 187 187 221 221 221 221 221 221 281 281 281 281 281 281 281 273 273 273 273 277 277 277 133 133 133 133 285 285 285 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 331 331 331 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 340 340 279 279 279 279 279 279 193 193 193 193 193 289 289 289 189 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 119 119 119 133 133 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 331 331 331 331 331 331 331 331 331 331 165 165 165 165 165 165 165 165 165 165 289 289 289 289 289 189 189 189 189 189 236 236 236 236 236 236 236 236 236 171 171 171 171 171 171 171 171 171 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 229 229 229 229 229 229 229 229 229 293 293 293 293 293 293 293 293 293 293 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 1 1 1 67 67 67 67 67 67 67 67 67 67 276 276 276 276 276 276 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 229 229 229 229 229 229 109 109 109 109 109 25 25 25 25 25 117 117 117 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 233 233 233 116 116 116 116 116 116 116 116 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 193 193 193 193 193 233 233 233 233 233 233 233 280 280 280 280 280 280 279 279 279 279 279 279 279 279 193 193 193 193 193 289 289 289 289 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 35 35 35 233 233 233 116 116 116 116 331 331 331 331 331 331 165 165 165 165 165 165 165 165 165 289 289 289 289 189 189 189 189 189 189 236 236 236 236 236 236 331 331 331 331 331 49 49 49 49 340 340 340 340 119 119 119 119 119 204 204 204 204 204 204 204 204 247 247 247 247 247 247 247 247 247 233 233 233 233 233 233 225 225 225 225 225 204 204 204 204 204 204 291 291 291 291 291 291 291 291 291 291 193 193 193 193 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 287 287 287 287 287 287 188 188 188 188 188 115 115 115 115 115 115 115 115 115 320 320 320 320 320 320 320 320 320 320 320 320 215 215 215 215 215 215 215 215 215 215 53 53 53 53 53 53 53 53 53 53 281 281 281 281 281 281 281 281 288 288 288 119 119 119 133 133 133 133 133 133 133 133 133 133 133 133 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 279 279 279 37 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 1 1 1 47 47 47 47 47 233 233 233 233 116 116 116 116 116 116 331 331 331 331 331 331 331 331 165 165 165 165 165 165 165 165 165 165 289 289 289 289 289 189 189 189 189 189 116 116 116 116 116 116 116 116 331 331 331 331 331 189 189 189 292 292 292 292 292 292 292 292 292 292 1 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 224 179 179 179 179 179 179 179 179 179 179 179 144 144 144 144 227 227 227 227 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 47 47 47 233 233 233 116 116 116 116 116 116 116 116 116 227 227 227 227 227 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 316 316 316 187 187 187 187 233 233 233 233 233 233 221 221 221 221 221 221 221 221 221 85 85 85 85 85 85 85 85 85 233 233 233 233 233 233 289 289 289 289 289 145 145 145 145 145 116 116 116 119 119 119 52 52 52 52 52 279 279 279 279 279 279 289 289 289 289 289 289 289 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 233 233 233 229 229 229 229 229 229 229 41 41 41 41 41 41 41 41 41 281 281 281 281 281 281 289 289 289 289 289 289 289 289 144 144 144 144 144 223 223 223 223 223 223 223 223 223 223 223 21 21 21 21 21 21 21 221 221 221 221 221 221 221 221 189 189 189 189 236 236 236 236 236 236 236 236 236 51 51 51 51 51 51 51 51 272 272 272 272 272 119 119 119 119 119 52 52 52 52 52 287 287 287 287 287 287 287 287 287 193 193 193 193 193 221 221 221 221 221 221 49 49 49 49 288 288 288 288 288 67 67 67 67 67 67 67 67 67 67 67 67 173 173 173 173 173 173 173 173 49 49 49 49 49 49 49 280 280 280 280 280 280 271 271 271 271 271 271 271 271 277 277 277 277 133 133 133 133 273 273 273 273 273 273 277 277 277 277 49 49 49 49 49 289 289 289 289 289 289 289 73 73 73 73 73 73 277 277 277 277 277 277 204 204 204 204 204 287 287 287 287 287 188 188 188 188 175 175 175 175 175 175 175 175 249 249 249 249 249 249 249 249 249 249 249 189 189 189 189 189 189 189 189 236 236 236 236 236 236 179 179 179 179 179 179 179 179 179 179 179 249 249 249 249 249 249 228 228 228 228 228 228 228 228 228 171 171 171 171 171 171 171 144 144 144 144 144 144 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 53 273 273 273 273 273 273 273 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 233 233 233 116 116 116 116 116 35 35 35 35 35 35 35 35 35 35 35 35 35 35 281 281 281 281 281 281 281 288 288 288 288 288 288 187 187 187 228 228 228 228 228 228 187 187 187 187 187 172 172 172 172 119 119 119 119 119 48 48 48 48 48 48 171 171 171 171 171 171 171 171 171 171 171 101 101 101 101 101 101 101 101 101 101 101 101 101 328 328 328 328 328 291 291 291 291 291 291 291 291 291 291 149 149 149 149 149 149 149 117 117 117 117 204 204 204 204 204 204 204 287 287 287 287 287 287 287 287 287 287 287 277 277 277 277 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 331 331 331 331 305 305 305 305 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 321 321 321 321 321 321 321 321 321 321 321 232 232 232 232 107 107 107 107 107 204 204 204 204 204 47 47 47 47 47 47 47 47 225 225 225 225 225 225 225 69 69 69 69 69 69 69 69 69 69 69 236 236 236 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 48 48 48 48 48 48 171 171 171 171 171 171 171 171 101 101 101 101 101 101 101 101 101 101 101 101 101 101 328 328 328 328 328 328 291 291 291 291 291 291 291 149 149 149 149 149 149 117 117 117 117 204 204 204 204 204 204 204 204 204 287 287 287 287 287 287 287 277 277 277 277 165 165 165 165 165 165 165 165 165 232 232 232 232 179 179 179 37 37 37 37 37 340 340 340 340 340 340 340 340 107 107 107 107 107 107 193 193 193 193 193 232 232 232 232 232 232 232 232 232 232 232 191 191 191 191 191 191 191 191 191 191 191 191 191 191 191 191 191 191 232 232 232 232 232 232 232 232 35 35 35 35 35 35 35 35 233 233 233 233 233 233 116 116 116 116 116 175 175 175 175 175 175 175 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 232 232 232 232 232 232 232 232 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 172 172 172 172 172 172 172 172 47 47 47 47 232 232 232 232 83 83 83 83 83 83 83 83 83 83 83 83 276 276 276 276 276 276 276 276 276 47 47 47 47 47 177 177 177 177 177 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 145 145 145 145 116 116 116 116 116 119 119 119 119 119 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 288 107 107 107 107 107 107 277 277 277 277 277 193 193 193 193 193 281 281 281 281 281 281 281 281 220 220 220 220 220 47 47 47 47 173 173 173 173 173 173 173 173 173 193 193 193 193 285 285 285 285 285 285 285 49 49 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 107 53 53 53 53 53 53 53 53 53 53 288 288 288 288 119 119 119 119 133 133 133 276 276 276 276 276 276 331 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 50 50 50 50 50 271 271 271 271 271 271 271 271 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 281 49 49 49 49 233 233 233 233 233 233 217 217 217 217 144 144 144 144 144 144 144 144 144 115 115 115 115 115 115 115 277 277 277 277 277 21 21 21 21 21 21 21 21 273 273 273 273 273 273 288 288 288 288 288 67 67 67 67 67 67 67 67 172 172 172 172 172 172 172 171 171 171 171 144 144 144 144 335 335 335 335 335 335 335 335 335 335 335 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 50 50 50 50 50 50 50 50 50 50 50 50 223 223 223 223 193 193 193 193 289 289 289 49 49 49 224 224 224 224 175 175 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 209 209 209 209 209 340 340 340 340 340 340 279 279 279 279 279 279 279 193 193 193 289 289 289 289 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 119 119 119 133 133 133 133 276 276 276 276 276 276 276 276 19 19 19 19 19 19 232 232 232 232 119 119 119 48 48 48 283 283 283 283 283 283 283 283 283 283 283 193 193 193 193 193 237 237 237 237 237 237 177 177 177 177 49 49 49 49 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 35 35 35 35 35 35 35 35 35 35 35 35 35 35 281 281 281 281 281 281 281 281 288 288 288 288 288 179 179 179 144 144 144 144 287 287 287 48 48 48 48 48 175 175 175 175 175 175 248 248 248 248 248 248 248 248 248 248 248 248 248 248 187 187 187 187 187 187 233 233 233 233 233 233 233 289 289 289 289 48 48 48 119 119 119 119 119 48 48 48 48 48 48 48 223 223 223 223 223 165 165 165 165 165 165 165 165 117 117 117 117 117 205 205 205 205 205 205 205 205 340 340 340 340 340 340 340 340 331 331 331 331 331 331 165 165 165 165 165 165 165 289 289 289 289 289 189 189 189 189 236 236 236 236 236 236 236 236 236 275 275 275 275 275 275 321 321 321 321 228 228 228 228 228 228 228 228 107 107 107 107 107 107 53 53 53 53 53 53 53 53 288 288 288 283 283 283 283 283 283 283 283 283 283 208 208 208 208 187 187 187 187 187 233 233 233 233 233 233 233 233 173 173 173 173 173 173 173 173 173 69 69 69 69 69 277 277 277 277 277 229 229 229 229 116 116 116 227 227 227 227 227 208 208 208 208 208 208 208 208 175 175 175 175 175 175 175 277 277 277 277 277 277 165 165 165 165 165 165 165 165 165 165 329 329 329 329 329 329 329 329 329 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 37 37 37 37 37 288 288 288 288 283 283 283 283 283 283 283 208 208 208 208 208 271 271 271 271 271 277 277 277 49 49 49 49 49 49 173 173 173 173 173 173 149 149 149 149 149 149 116 116 116 116 287 287 287 287 188 188 188 188 279 279 279 279 279 289 289 289 289 289 289 164 164 164 164 164 164 83 83 83 83 83 83 83 83 83 83 289 289 289 289 289 289 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 283 209 209 209 209 209 209 209 209 209 340 340 340 340 340 340 340 50 50 50 50 50 50 50 50 219 219 219 219 219 219 219 219 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 102 102 102 102 102 102 102 102 283 283 283 283 283 283 283 283 283 305 305 305 305 305 116 116 116 116 116 279 279 279 279 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 228 228 228 228 231 231 231 231 231 21 21 21 21 21 288 288 288 288 187 187 187 187 221 221 221 221 221 221 281 281 281 281 273 273 273 273 273 273 133 133 133 133 133 221 221 221 221 221 221 289 289 289 289 289 189 189 189 189 236 236 236 236 236 236 50 50 50 50 50 175 175 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 116 116 116 116 116 116 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 107 107 107 107 107 107 107 225 225 225 225 225 225 225 225 37 37 37 37 237 237 237 237 237 237 221 221 221 221 221 221 221 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 289 289 289 289 289 289 280 280 280 280 50 50 50 50 50 107 107 107 107 107 107 107 107 107 107 107 107 107 107 107 107 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 99 99 99 99 99 99 99 99 99 99 99 99 328 328 328 328 328 328 328 219 219 219 219 219 219 219 219 219 219 53 53 53 228 228 228 228 228 228 228 228 228 228 171 171 171 171 171 171 171 171 171 171 171 171 69 69 69 69 69 69 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 208 208 208 208 208 208 283 283 283 283 283 283 283 283 305 305 305 305 305 116 116 116 116 107 107 107 107 107 208 208 208 208 208 208 208 208 208 208 179 179 179 179 179 179 179 179 179 179 179 179 209 209 209 209 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 340 340 39 39 39 39 39 39 39 39 39 39 39 39 39 225 225 225 225 225 49 49 49 49 49 49 177 177 177 177 177 177 177 341 341 341 341 341 341 341 341 37 37 37 37 37 37 37 233 233 233 233 233 117 117 117 117 144 144 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 279 279 273 273 273 273 273 273 133 133 133 133 133 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 144 144 144 144 144 144 144 144 144 144 331 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 287 287 287 287 287 188 188 188 188 107 107 107 107 107 107 107 107 277 277 277 277 193 193 193 193 193 236 236 236 236 236 236 179 179 179 179 193 193 193 193 228 228 228 228 228 228 228 228 247 247 247 247 247 247 247 247 329 329 329 329 329 329 329 329 144 144 144 144 144 171 171 171 171 171 171 171 171 145 145 145 145 228 228 228 228 231 231 231 231 231 231 231 249 249 249 249 249 249 249 329 329 329 329 48 48 48 48 48 279 279 279 279 279 279 279 279 279 221 221 221 221 221 249 249 249 249 249 249 249 249 285 285 285 285 285 285 285 285 285 285 285 48 48 48 48 48 48 171 171 171 171 171 171 171 171 171 171 69 69 69 69 69 276 276 276 276 276 227 227 227 227 227 227 227 208 208 208 208 208 208 208 208 208 208 208 208 208 208 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 48 48 48 48 48 279 279 279 279 279 279 279 279 289 289 289 289 289 289 165 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 49 49 49 49 233 233 233 233 229 229 229 229 229 229 41 41 41 41 41 41 41 41 41 41 41 281 281 281 281 281 281 281 281 289 289 289 289 289 289 144 144 144 144 144 144 144 144 331 331 331 331 331 331 331 193 193 193 193 281 281 281 281 281 281 281 281 281 49 49 49 49 49 225 225 225 225 225 225 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 175 175 175 175 175 175 175 175 175 133 133 133 133 133 133 133 133 133 133 280 280 280 280 280 280 280 280 280 280 119 119 119 119 119 133 133 133 133 133 277 277 277 277 277 277 340 340 340 340 340 279 279 279 279 279 279 279 279 53 53 53 53 53 53 53 228 228 228 228 228 228 227 227 227 227 189 189 189 189 189 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 165 165 165 165 165 165 165 165 165 165 165 165 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 281 189 189 189 189 340 340 340 340 279 279 279 279 279 273 273 273 273 273 273 133 133 133 133 133 133 233 233 233 233 233 233 281 281 281 281 281 281 281 144 144 144 144 144 144 219 219 219 219 219 219 219 219 165 165 165 165 165 165 165 165 165 228 228 228 228 228 228 228 228 67 67 67 67 67 67 67 67 172 172 172 172 172 172 172 172 119 119 119 119 52 52 52 52 287 287 287 287 287 287 287 287 277 277 277 277 165 165 165 165 165 165 165 232 232 232 232 232 232 331 331 331 331 331 189 189 189 189 292 292 292 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 47 47 47 47 233 233 233 233 116 116 116 175 175 175 175 175 175 165 165 165 165 165 165 165 165 165 165 165 328 328 328 328 179 179 179 179 179 179 179 144 144 144 144 144 144 144 187 187 187 187 187 233 233 233 233 289 289 289 289 289 48 48 48 227 227 227 227 227 227 227 100 100 100 100 100 100 100 100 100 100 111 111 111 111 111 111 111 111 111 111 111 21 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 277 277 277 277 216 216 216 216 216 216 216 216 216 216 216 216 216 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 133 116 116 116 116 335 335 335 335 335 335 335 335 335 335 335 335 320 320 320 320 320 320 320 320 320 320 320 47 47 47 47 47 47 233 233 233 116 116 116 335 335 335 69 69 69 69 69 69 69 69 276 276 276 276 276 279 279 279 279 279 279 279 279 279 279 279 193 193 193 193 193 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 144 144 144 144 144 144 144 331 331 331 331 331 331 331 331 331 331 144 144 144 144 144 144 47 47 47 47 47 117 117 117 117 117 117 117 21 21 21 21 21 21 21 21 21 21 21 273 273 273 273 289 289 289 289 189 189 189 189 236 236 236 236 236 236 179 179 179 179 179 179 148 148 148 148 148 148 148 148 148 148 148 171 171 171 171 171 171 171 145 145 145 145 228 228 228 228 228 47 47 47 232 232 232 232 232 232 232 67 67 67 67 67 67 67 67 67 277 277 277 277 277 173 173 173 173 173 173 173 173 49 49 49 232 232 232 232 47 47 47 47 47 47 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 225 49 49 49 49 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 119 119 119 49 49 49 49 288 288 288 288 288 288 335 335 335 335 335 335 320 320 320 320 320 320 331 331 331 331 331 305 305 305 305 116 116 116 107 107 107 107 107 204 204 204 204 204 204 204 47 47 47 47 47 47 225 225 225 225 225 225 225 225 69 69 69 69 69 69 69 236 236 236 236 236 236 236 236 171 171 171 171 171 171 171 144 144 144 179 179 179 179 179 179 148 148 148 148 148 148 148 148 271 271 271 271 271 271 271 271 271 277 277 277 277 133 133 133 133 341 341 341 341 341 341 341 49 49 49 49 233 233 233 233 233 289 289 289 289 289 289 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 289 289 289 289 289 289 280 280 280 280 280 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 102 102 102 102 102 102 102 102 102 102 102 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 47 47 47 47 47 109 109 109 109 109 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 102 102 102 102 102 102 102 102 102 102 102 102 102 102 179 179 179 179 179 179 179 37 37 37 37 37 37 329 329 329 329 49 49 49 49 232 232 232 232 175 175 175 175 175 175 175 21 21 21 21 21 21 288 288 288 288 288 131 131 131 233 233 233 233 233 233 204 204 204 204 204 227 227 227 227 227 227 227 227 227 69 69 69 69 69 69 69 276 276 276 276 276 276 276 67 67 67 67 67 67 67 67 67 277 277 277 277 277 173 173 173 173 173 173 173 173 49 49 49 49 49 233 233 233 233 233 233 233 340 340 340 340 340 219 219 219 219 219 49 49 49 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 209 209 209 209 209 209 209 209 209 225 225 225 225 225 225 225 225 225 116 116 116 116 116 179 179 179 193 193 193 193 277 277 277 277 277 277 277 277 277 49 49 49 49 109 109 109 109 89 89 89 89 89 89 89 89 89 89 89 89 89 89 89 89 289 289 289 289 289 289 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 115 115 115 115 115 115 115 249 249 249 249 249 249 249 249 249 233 233 233 233 288 288 288 288 55 55 55 55 55 55 233 233 233 233 233 117 117 117 117 145 145 145 145 145 145 281 281 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 233 233 233 233 233 116 116 116 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 116 116 116 116 116 116 116 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 337 337 337 337 316 316 316 316 316 316 316 179 179 179 179 179 179 179 179 179 179 133 133 133 133 133 225 225 225 225 225 225 273 273 273 273 273 273 273 225 225 225 225 49 49 49 49 49 49 281 281 281 281 281 281 281 281 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 193 193 193 193 193 285 285 285 285 285 285 285 285 285 285 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 119 119 119 119 49 49 49 49 49 288 288 288 288 288 288 288 227 227 227 227 227 17 17 17 277 277 277 277 277 277 277 277 193 193 193 193 193 193 225 225 225 225 225 225 225 225 48 48 48 48 48 331 331 331 331 331 331 331 49 49 49 49 49 340 340 340 340 340 35 35 35 35 35 288 288 288 288 288 179 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 233 233 233 233 233 233 233 116 116 116 287 287 287 287 287 188 188 188 219 219 219 219 219 219 219 219 219 219 219 219 249 249 249 249 249 249 249 249 249 272 272 272 272 272 272 272 272 272 331 331 331 189 189 189 120 120 120 119 119 119 52 52 52 279 279 279 279 279 279 279 279 197 197 197 197 197 113 113 113 113 113 113 113 113 317 317 317 317 317 317 317 317 317 165 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 331 133 133 133 133 133 133 133 133 133 133 133 133 224 224 224 224 224 224 224 224 335 335 335 335 335 335 335 335 335 321 321 321 321 116 116 116 116 116 116 107 107 107 107 107 107 133 133 133 133 289 289 289 289 289 144 144 144 144 144 144 219 219 219 219 219 219 333 333 333 333 133 133 133 133 281 281 281 281 281 113 113 113 113 113 113 113 113 49 49 49 232 232 232 119 119 119 52 52 52 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 116 116 116 116 119 119 119 52 52 52 52 52 52 279 279 279 279 279 279 289 289 289 289 289 289 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 49 49 49 232 232 232 227 227 227 227 227 227 227 37 37 37 37 37 37 37 281 281 281 281 281 289 289 289 289 289 289 144 144 144 219 219 219 219 219 219 219 219 219 219 133 133 133 133 133 277 277 277 277 277 277 277 277 225 225 225 225 49 49 49 49 49 49 281 281 281 281 281 281 281 281 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 115 115 115 115 115 115 115 133 133 133 133 133 133 133 276 276 276 276 276 276 279 279 279 279 279 279 279 279 279 279 164 164 164 164 164 164 164 164 164 164 164 283 283 283 283 283 283 283 283 283 209 209 209 209 224 224 224 224 224 224 107 107 107 107 107 107 204 204 204 204 204 204 204 204 163 163 163 163 163 163 109 109 109 109 49 49 49 224 224 224 287 287 287 287 287 287 320 320 320 320 320 187 187 187 187 187 221 221 221 221 221 221 281 281 281 281 281 281 273 273 273 273 273 225 225 225 225 225 225 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 209 209 209 209 209 340 340 340 340 340 340 340 340 175 175 175 175 175 175 175 21 21 21 288 288 288 288 288 50 50 50 50 50 50 287 287 287 287 287 287 287 287 287 287 287 53 53 53 53 53 53 236 236 236 236 236 236 236 47 47 47 328 328 328 328 328 179 179 179 179 179 144 144 144 144 144 144 144 144 144 144 144 247 247 247 247 247 247 247 247 247 247 247 247 247 247 247 247 232 232 232 232 232 232 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 289 289 289 289 289 289 289 280 280 280 279 279 279 279 279 279 279 279 279 149 149 149 149 149 149 149 149 149 149 149 289 289 289 289 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 165 165 165 165 165 165 165 165 109 109 109 204 204 204 204 204 204 204 119 119 119 119 119 164 164 164 164 164 164 164 164 331 331 331 331 331 331 331 331 331 331 144 144 144 144 144 83 83 83 83 83 83 83 83 288 288 288 288 288 47 47 47 328 328 328 328 328 328 328 328 328 107 107 107 107 107 107 107 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 340 340 340 340 340 340 340 47 47 47 47 328 328 328 119 119 119 119 52 52 52 107 107 107 107 107 277 277 277 277 277 277 37 37 37 37 37 37 37 37 37 233 233 233 233 116 116 116 335 335 335 335 335 320 320 320 320 320 320 320 331 331 331 331 331 331 331 331 331 21 21 21 21 233 233 233 233 233 289 289 289 289 289 49 49 49 49 49 49 49 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 208 208 208 208 208 208 208 331 331 331 331 331 331 331 331 331 331 69 69 69 69 69 69 69 69 69 69 69 221 221 221 221 221 288 288 288 288 288 288 288 288 288 1 1 1 215 215 215 215 215 215 215 215 215 215 69 69 69 69 69 69 69 69 233 233 233 233 233 289 289 289 289 289 49 49 49 225 225 225 225 225 204 204 204 204 204 204 204 204 47 47 47 47 47 47 47 333 333 333 333 333 333 333 333 333 333 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 107 107 209 209 209 209 209 209 209 209 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 179 179 179 179 179 179 179 179 179 53 53 53 53 237 237 237 237 237 237 237 237 237 177 177 177 177 277 277 277 277 277 277 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 116 116 116 116 119 119 119 204 204 204 204 204 204 204 47 47 47 47 233 233 233 233 233 233 173 173 173 173 173 173 173 173 173 69 69 69 69 277 277 277 277 277 277 113 113 113 113 113 113 49 49 49 233 233 233 49 49 49 49 49 49 288 288 288 288 288 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 293 337 337 337 337 337 337 316 316 316 316 316 316 316 331 331 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 223 223 223 223 223 223 223 133 133 133 133 133 133 133 173 173 173 173 173 288 288 288 287 287 287 287 188 188 188 188 188 115 115 115 115 115 115 115 115 115 115 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 1 1 1 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 331 331 331 331 193 193 193 193 112 112 112 112 112 112 112 112 112 112 331 331 331 49 49 49 340 340 340 340 340 340 340 179 179 179 179 179 179 179 179 179 179 21 21 21 21 21 21 277 277 277 277 277 117 117 117 144 144 144 144 144 144 171 171 171 171 171 171 171 144 144 144 144 179 179 179 179 179 179 179 179 179 179 179 179 179 193 193 193 193 193 193 228 228 228 228 228 228 228 228 228 228 228 228 228 119 119 119 119 119 49 49 49 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 107 107 107 107 107 107 193 193 193 193 193 193 193 193 277 277 277 277 277 277 277 117 117 117 117 189 189 189 189 236 236 236 236 236 236 236 236 236 50 50 50 50 50 50 223 223 223 223 223 223 223 223 223 223 223 223 223 223 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 49 49 49 232 232 232 232 187 187 187 187 232 232 232 187 187 187 187 187 187 187 289 289 289 289 289 289 280 280 280 280 280 280 280 280 280 115 115 115 115 115 115 115 115 133 133 133 133 133 133 133 133 133 133 133 133 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 69 69 69 69 69 69 69 69 69 69 220 220 220 220 220 220 220 220 220 220 220 51 51 51 51 51 51 51 51 51 272 272 272 272 272 272 272 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 320 50 50 50 50 50 50 175 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 50 50 50 50 50 50 50 50 50 50 50 50 50 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 289 277 277 277 277 165 165 165 165 165 165 165 165 165 165 165 233 233 233 233 216 216 216 216 216 216 216 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 1 1 1 47 47 47 47 47 47 47 232 232 232 232 232 232 232 232 232 232 232 232 67 67 67 67 67 67 67 67 67 67 277 277 277 277 277 277 173 173 173 173 173 173 173 173 173 173 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 233 116 116 116 115 115 115 189 189 189 189 189 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 37 37 37 233 233 233 233 233 116 116 116 116 47 47 47 47 328 328 328 328 328 179 179 179 179 179 179 179 179 144 144 144 144 144 144 144 144 144 144 144 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 100 100 100 100 100 100 100 100 100 100 100 100 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 331 331 331 331 331 331 331 53 53 53 53 53 53 341 341 341 341 49 49 49 49 233 233 233 288 288 288 50 50 50 50 50 107 107 107 107 107 107 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 264 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 293 293 293 293 337 337 337 337 337 337 316 316 316 316 316 316 316 316 316 316 316 175 175 175 175 175 175 175 175 175 277 277 277 277 277 277 277 277 277 277 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 233 233 233 233 233 116 116 116 116 116 116 187 187 187 187 232 232 232 232 232 232 232 232 232 232 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 273 273 273 273 273 193 193 193 277 277 277 277 277 277 277 277 277 277 277 277 277 277 277 189 189 189 189 189 288 288 288 288 131 131 131 131 131 131 340 340 340 340 179 179 179 179 208 208 208 208 208 208 287 287 287 287 287 287 287 287 287 287 287 149 149 149 149 149 149 149 149 149 149 149 149 149 149 233 233 233 233 116 116 116 116 47 47 47 109 109 109 109 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 288 288 47 47 47 233 233 233 233 233 116 116 116 116 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 53 53 53 53 53 53 53 173 173 173 173 173 173 173 49 49 49 49 225 225 225 225 225 225 225 225 225 225 225 116 116 116 1 1 1 1 1 215 215 215 215 215 215 215 215 215 215 215 215 133 133 133 133 133 133 233 233 233 233 233 233 233 289 289 289 289 289 289 225 225 225 225 204 204 204 204 204 204 204 204 115 115 115 115 115 115 115 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 119 119 119 119 52 52 52 271 271 271 271 271 271 271 271 225 225 225 225 225 225 37 37 37 37 37 37 37 289 289 289 289 289 289 289 173 173 173 173 173 173 173 73 73 73 73 277 277 277 277 277 228 228 228 228 228 228 228 287 287 287 287 287 287 287 287 69 69 69 69 69 69 69 69 277 277 277 277 277 117 117 117 117 117 117 340 340 340 340 340 179 179 179 179 179 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 179 179 179 179 179 179 37 37 37 37 37 116 116 116 107 107 107 107 107 193 193 193 193 193 232 232 232 232 232 232 331 331 331 331 331 331 21 21 21 21 21 21 21 21 21 113 113 113 113 113 113 113 189 189 189 189 189 236 236 236 236 236 236 179 179 179 179 179 179 179 193 193 193 193 228 228 228 228 228 131 131 131 131 131 131 131 131 131 131 131 131 329 329 329 329 329 329 329 144 144 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 279 193 193 193 193 233 233 233 233 233 233 233 233 280 280 280 280 280 179 179 179 179 179 208 208 208 179 179 179 37 37 37 116 116 116 116 116 271 271 271 271 271 271 271 271 271 271 271 271 37 37 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 281 288 288 288 288 179 179 179 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 283 283 283 283 283 283 208 208 208 208 208 208 208 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 116 116 116 179 179 179 179 179 179 179 179 148 148 148 148 148 148 148 148 148 148 148 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 340 340 340 340 340 340 340 340 340 19 19 19 19 19 19 19 232 232 232 232 232 179 179 179 179 179 179 193 193 193 193 193 228 228 228 228 228 228 231 231 231 231 231 231 231 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 316 316 316 316 316 331 331 331 331 331 49 49 49 49 49 340 340 340 340 340 340 340 340 340 231 231 231 231 231 21 21 21 21 21 21 21 21 288 288 288 288 288 288 288 223 223 223 223 223 223 223 305 305 305 305 221 221 221 221 221 221 221 189 189 189 189 236 236 236 236 236 236 236 236 236 236 35 35 35 35 35 35 35 35 35 288 288 288 288 179 179 179 179 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 50 50 50 50 50 50 50 50 50 50 111 111 111 111 111 111 111 111 111 111 111 111 111 101 101 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 116 116 116 47 47 47 47 328 328 328 328 328 47 47 47 47 109 109 109 109 109 109 109 85 85 85 85 85 85 288 288 288 288 288 187 187 187 225 225 225 225 225 225 225 225 225 225 225 225 133 133 133 133 329 329 329 329 329 329 329 329 49 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 175 175 175 175 175 175 175 175 21 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 109 109 109 116 116 116 116 116 116 116 187 187 187 232 232 232 232 232 50 50 50 50 327 327 327 327 327 327 133 133 133 133 133 133 277 277 277 277 277 277 277 277 277 204 204 204 204 204 204 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 69 69 69 69 69 69 69 69 69 277 277 277 277 277 277 277 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 327 327 327 327 327 327 327 327 327 327 327 133 133 133 133 133 277 277 277 277 277 277 277 277 277 204 204 204 204 204 204 204 287 287 287 287 287 287 287 287 287 287 287 287 287 287 287 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 327 327 327 327 327 327 327 327 133 133 133 133 133 133 133 277 277 277 277 277 277 277 204 204 204 204 204 204 204 204 204 204 204 204 204 204 51 51 51 51 51 51 51 51 51 51 51 177 177 177 177 177 177 177 225 225 225 225 225 225 225 225 204 204 204 204 204 204 204 204 115 115 115 115 115 115 115 277 277 277 277 277 133 133 133 133 133 133 133 280 280 280 280 280 280 280 280 280 280 280 280 280 47 47 47 47 328 328 328 328 328 328 328 335 335 335 335 335 335 335 335 335 133 133 133 133 133 133 225 225 225 225 225 225 245 245 245 245 245 245 245 245 245 245 245 189 189 189 189 189 284 284 284 284 284 284 284 284 284 284 284 284 284 175 175 175 175 175 175 277 277 277 277 277 277 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 331 331 331 331 331 331 331 331 331 331 331 193 193 193 233 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 281 281 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 331 331 331 331 331 331 331 331 331 331 69 69 69 69 69 69 276 276 276 276 276 276 50 50 50 50 50 50 171 171 171 171 171 171 171 171 171 171 171 171 171 165 165 165 165 165 165 165 165 165 165 165 165 165 117 117 117 117 117 189 189 189 189 189 189 116 116 116 116 116 116 116 107 107 107 107 107 107 107 107 107 277 277 277 277 277 277 277 85 85 85 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 232 232 279 279 279 279 279 279 279 279 279 279 279 279 165 165 165 165 165 165 165 165 165 225 225 225 225 225 225 225 225 144 144 144 144 144 144 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 47 47 47 47 47 47 47 47 47 47 47 233 233 233 233 116 116 116 107 107 107 107 107 107 189 189 189 189 233 233 233 233 233 233 209 209 209 209 209 209 209 209 209 209 209 292 292 292 292 292 292 292 292 119 119 119 52 52 52 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 221 221 221 221 221 221 221 281 281 281 281 281 281 281 289 289 289 289 289 289 289 133 133 133 133 133 133 133 233 233 233 233 233 233 117 117 117 117 117 189 189 189 236 236 236 236 236 236 236 236 236 115 115 115 115 115 115 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 179 179 179 179 179 179 179 179 179 179 144 144 144 144 144 107 107 107 107 107 107 107 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 331 331 331 331 144 144 144 144 144 144 144 287 287 287 287 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 107 107 107 107 107 107 107 277 277 277 277 277 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 117 117 117 117 117 117 340 340 340 340 340 340 47 47 47 47 47 328 328 328 327 327 327 327 327 327 327 133 133 133 133 133 277 277 277 277 277 277 277 277 204 204 204 204 204 204 204 204 204 291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 193 193 193 193 193 193 193 193 193 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 115 115 115 115 115 115 197 197 197 197 281 281 281 281 281 281 281 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 117 117 117 117 117 49 49 49 49 49 117 117 117 117 117 117 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 275 275 275 275 275 275 275 275 275 275 275 275 275 275 133 133 133 133 133 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 179 133 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 144 144 144 144 144 144 171 171 171 171 171 171 171 171 171 165 165 165 165 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 280 280 331 331 331 331 331 49 49 49 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 229 229 229 229 229 229 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 47 47 47 233 233 233 233 233 233 233 116 116 116 116 116 116 291 291 291 291 291 291 291 291 291 291 291 291 291 193 193 193 193 193 193 193 232 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 225 225 225 225 225 225 225 281 281 281 281 281 281 281 281 281 281 281 244 244 244 244 244 244 244 244 244 244 227 227 227 227 227 227 227 227 227 53 53 53 53 53 53 53 112 112 112 112 112 112 112 112 112 171 171 171 171 171 171 171 171 171 171 277 277 277 133 133 133 133 221 221 221 221 221 221 221 221 49 49 49 49 49 225 225 225 225 225 225 225 225 225 225 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 144 144 144 227 227 227 227 227 227 227 227 227 227 227 227 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 292 292 292 292 292 292 292 292 292 292 292 292 292 292 292 292 292 292 331 331 331 49 49 49 49 340 340 340 340 340 340 340 223 223 223 223 223 223 223 223 223 21 21 21 21 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 277 277 277 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 216 1 1 1 47 47 47 47 47 47 47 47 233 233 233 116 116 116 116 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 331 331 331 331 331 331 331 148 148 148 179 179 179 179 179 179 179 179 144 144 144 144 144 144 144 144 144 144 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 331 331 331 331 193 193 193 193 193 112 112 112 112 112 112 112 112 112 112 223 223 223 223 223 223 223 305 305 305 305 305 221 221 221 221 221 221 288 288 288 288 288 288 288 288 288 288 288 288 175 175 175 175 175 175 277 277 277 277 277 277 209 209 209 209 209 209 209 209 209 209 209 209 209 232 232 232 232 232 187 187 187 187 232 232 232 232 232 232 232 232 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 228 228 228 228 228 228 228 228 223 223 223 223 223 223 223 101 101 101 101 101 101 101 101 101 101 101 101 289 289 289 289 289 289 280 280 280 280 47 47 47 233 233 233 116 116 116 227 227 227 227 227 227 227 321 321 321 321 321 321 321 321 321 321 321 321 321 321 117 117 117 117 117 117 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 233 233 116 116 116 116 116 116 175 175 175 175 175 175 277 277 277 277 277 277 277 277 277 164 164 164 164 164 164 164 164 164 164 164 164 191 191 191 191 191 191 191 232 232 232 232 232 232 51 51 51 51 51 51 51 51 121 121 121 121 121 121 121 121 145 145 145 145 145 145 145 145 145 145 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 248 248 171 171 171 171 171 171 171 171 21 21 21 21 21 21 21 276 276 276 276 276 276 276 119 119 119 119 204 204 204 204 204 204 204 67 67 67 67 67 67 67 67 67 67 277 277 277 277 277 117 117 117 49 49 49 233 233 233 137 137 137 277 277 277 277 277 204 204 204 204 204 204 204 47 47 47 47 47 109 109 109 109 109 109 341 341 341 341 341 341 341 341 149 149 149 149 149 149 149 149 329 329 329 329 329 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 232 232 232 232 187 187 187 187 187 221 221 221 221 221 281 281 281 281 289 289 289 289 289 289 289 277 277 277 277 277 69 69 69 69 69 277 277 277 277 277 117 117 117 49 49 49 233 233 233 137 137 137 137 137 137 137 277 277 277 277 277 277 277 204 204 204 204 204 204 204 47 47 47 47 109 109 109 109 109 109 341 341 341 341 341 341 341 341 149 149 149 149 149 149 149 149 149 149 329 329 329 329 329 329 329 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 288 288 288 288 179 179 179 179 37 37 37 328 328 328 328 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 209 209 232 232 232 232 232 232 119 119 119 119 49 49 49 288 288 288 288 119 119 119 52 52 52 52 52 52 52 111 111 111 111 111 111 111 111 111 111 111 111 111 111 193 193 193 193 193 232 232 232 232 232 232 232 232 232 331 331 331 49 49 49 49 340 340 340 340 340 340 340 340 340 340 327 327 327 327 327 327 133 133 133 133 277 277 277 277 277 277 277 204 204 204 204 204 204 204 204 204 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 265 265 265 265 265 265 265 265 265 233 233 233 233 233 289 289 289 289 289 289 289 289 289 189 189 189 189 116 116 116 116 116 47 47 47 233 233 233 233 116 116 116 116 271 271 271 271 271 277 277 277 277 49 49 49 49 233 233 233 233 233 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 49 49 49 49 49 288 288 288 288 288 119 119 119 119 48 48 48 48 48 107 107 107 107 107 107 107 107 107 193 193 193 193 193 193 193 176 176 176 176 176 176 176 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 340 340 340 340 340 340 340 340 331 331 331 331 331 144 144 144 144 144 171 171 171 171 171 171 171 171 171 171 171 305 305 305 305 305 224 224 224 224 224 224 224 47 47 47 47 328 328 328 328 328 328 328 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 273 273 273 273 273 193 193 193 277 277 277 277 277 277 277 277 277 277 189 189 189 189 189 288 288 288 288 288 47 47 47 233 233 233 233 116 116 116 327 327 327 189 189 189 189 189 189 189 189 189 189 189 189 329 329 329 329 329 329 37 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 281 281 189 189 189 189 289 289 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 49 49 49 49 288 288 288 288 119 119 119 52 52 52 52 227 227 227 227 227 227 227 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 292 292 292 292 292 292 292 292 292 292 331 331 331 331 49 49 49 49 340 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 279 279 279 333 333 333 333 333 209 209 209 209 209 209 209 209 209 288 288 288 288 288 288 223 223 223 223 223 223 223 223 193 193 193 193 193 193 193 193 273 273 273 273 273 273 273 273 288 288 288 288 288 288 47 47 47 233 233 233 116 116 116 187 187 187 187 221 221 221 221 221 221 221 281 281 281 281 281 281 281 273 273 273 273 273 277 277 277 133 133 133 133 133 133 281 281 281 281 281 281 281 281 281 281 281 281 189 189 189 189 189 189 189 189 189 189 189 328 328 328 328 328 328 328 328 328 328 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 49 49 49 49 288 288 288 288 288 288 119 119 119 52 52 52 52 52 171 171 171 171 171 171 171 171 171 171 171 171 171 171 69 69 69 69 69 69 69 69 69 277 277 277 277 277 181 181 181 181 181 181 181 181 181 129 129 129 129 129 129 129 129 129 129 129 116 116 116 116 116 116 116 116 116 116 331 331 331 331 331 331 49 49 49 49 49 340 340 340 340 340 340 340 340 340 340 107 107 107 107 107 107 107 107 277 277 277 277 277 277 277 277 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 116 116 116 116 116 47 47 47 47 233 233 233 233 116 116 116 116 116 116 171 171 171 171 171 171 171 171 171 171 171 171 305 305 305 305 305 305 305 305 305 305 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 187 187 187 232 232 232 232 232 283 283 283 283 283 283 283 283 283 283 283 283 283 283 69 69 69 69 69 69 69 69 69 69 277 277 277 277 277 277 277 277 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 19 19 19 19 19 19 19 19 19 19 19 276 276 276 276 276 276 276 276 115 115 115 115 115 115 189 189 189 189 189 281 281 281 281 281 281 281 281 281 281 281 281 149 149 149 149 149 149 149 149 149 149 233 233 233 233 189 189 189 189 189 189 236 236 236 236 236 236 236 236 187 187 187 187 187 221 221 221 221 221 221 221 221 221 221 281 281 281 289 289 289 289 289 277 277 277 277 277 69 69 69 277 277 277 277 277 277 117 117 117 49 49 49 233 233 233 137 137 137 137 137 137 277 277 277 204 204 204 47 47 47 47 109 109 109 109 109 341 341 341 341 341 341 341 341 341 149 149 149 149 149 149 149 149 149 149 149 329 329 329 329 329 329 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 288 288 288 179 179 179 37 37 37 328 328 328 328 328 328 219 219 219 219 49 49 49 49 49 49 233 233 233 233 233 221 221 221 221 221 221 221 221 221 221 221 225 225 225 225 321 321 321 321 321 321 321 321 117 117 117 117 117 189 189 189 189 189 189 189 189 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 49 49 49 49 340 340 340 340 340 340 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 223 223 223 223 223 223 223 223 321 321 321 321 321 321 321 321 321 117 117 117 117 49 49 49 49 221 221 221 221 221 277 277 277 277 277 49 49 49 49 281 281 281 281 281 281 281 281 281 225 225 225 225 225 204 204 204 204 204 204 204 47 47 47 47 47 173 173 173 173 173 173 173 173 173 173 277 277 277 277 277 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 293 293 337 337 337 337 337 337 316 316 316 316 316 316 316 316 179 179 179 179 179 179 179 179 179 179 89 89 89 89 89 89 89 89 89 133 133 133 133 133 133 133 133 133 329 329 329 329 329 329 329 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 279 279 279 279 279 279 279 273 273 273 273 273 273 133 133 133 133 133 133 133 277 277 277 277 277 277 277 277 277 277 277 277 116 116 116 119 119 119 119 119 204 204 204 204 204 63 63 63 63 63 63 63 63 277 277 277 277 277 277 277 277 117 117 117 117 117 117 117 117 209 209 209 209 209 209 209 209 209 209 209 224 224 224 224 224 224 224 224 47 47 47 47 328 328 328 328 328 279 279 279 279 279 279 279 279 273 273 273 273 273 273 273 273 209 209 209 209 209 209 221 221 221 221 221 189 189 189 189 189 236 236 236 236 236 236 236 236 171 171 171 171 171 171 171 171 171 171 171 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 281 281 281 281 281 281 281 281 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 171 171 171 171 171 171 171 171 171 171 171 171 171 69 69 69 69 69 276 276 276 276 276 276 276 131 131 131 131 340 340 340 340 340 340 279 279 279 279 279 279 279 279 321 321 321 321 321 321 321 232 232 232 232 232 131 131 131 340 340 340 340 283 283 283 283 283 283 283 208 208 208 208 208 208 219 219 219 219 219 219 49 49 49 49 233 233 233 233 233 221 221 221 221 221 221 221 221 221 225 225 225 225 225 321 321 321 321 321 321 117 117 117 117 117 189 189 189 189 189 116 116 116 116 116 119 119 119 49 49 49 288 288 288 179 179 179 179 179 179 208 208 208 208 208 208 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 229 229 229 229 229 189 189 189 189 236 236 236 236 236 236 236 287 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 179 179 179 179 179 179 179 148 148 148 148 148 148 148 148 148 148 148 148 148 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 305 305 305 305 305 116 116 116 116 116 51 51 51 51 51 51 51 51 51 51 51 272 272 272 272 272 272 272 272 272 272 272 272 272 272 272 1 1 1 1 1 1 1 1 1 1 1 1 1 175 175 175 175 175 277 277 277 277 37 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 273 273 273 273 273 273 189 189 189 189 189 236 236 236 236 236 236 236 331 331 331 331 189 189 189 189 292 292 292 292 292 292 292 292 331 331 331 331 331 331 331 331 53 53 53 53 53 53 53 53 232 232 232 232 232 232 232 232 232 232 232 291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 291 193 193 193 193 193 193 193 193 193 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 232 107 107 107 107 107 107 277 277 277 277 277 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 233 233 233 233 233 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 52 52 52 52 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 233 233 233 233 233 233 233 233 233 117 117 117 117 49 49 49 49 49 224 224 224 224 224 47 47 47 47 328 328 328 328 328 50 50 50 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 283 37 37 37 37 37 37 37 37 37 109 109 109 109 109 109 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 247 247 247 247 247 247 225 225 225 225 225 225 225 225 225 225 225 116 116 116 116 116 116 171 171 171 171 171 171 171 171 171 171 171 171 171 37 37 37 37 37 37 37 37 37 37 285 285 285 285 285 285 285 285 285 285 49 49 49 49 233 233 233 233 233 116 116 116 116 116 116 219 219 219 219 219 219 219 219 219 219 219 21 21 21 21 21 277 277 277 277 273 273 273 273 273 49 49 49 49 49 49 288 288 288 288 107 107 107 107 107 107 107 107 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 176 176 176 176 176 176 176 176 176 176 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 204 204 204 204 204 204 204 204 51 51 51 51 51 51 51 51 51 51 51 51 121 121 121 121 121 121 121 121 144 144 144 144 144 144 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 179 179 179 179 179 179 179 133 133 133 133 133 133 225 225 225 225 225 225 225 225 116 116 116 116 83 83 83 83 83 83 83 83 83 83 83 83 288 288 288 287 287 287 287 287 188 188 188 188 179 179 179 179 179 179 179 179 179 179 193 193 193 193 193 193 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 279 279 279 279 279 279 279 49 49 49 49 273 273 273 273 273 273 273 249 249 249 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 335 335 335 320 320 320 320 320 320 320 19 19 19 276 276 276 276 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 289 289 289 289 289 144 144 144 144 144 227 227 227 227 227 227 227 37 37 37 37 37 37 293 293 293 293 293 337 337 337 337 316 316 316 316 316 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 293 293 293 293 293 293 293 293 109 109 109 109 109 109 109 145 145 145 145 145 145 288 288 288 288 47 47 47 328 328 328 328 328 175 175 175 175 175 175 277 277 277 209 209 209 209 209 209 232 232 232 232 232 232 175 175 175 175 175 165 165 165 165 165 165 165 165 165 165 109 109 109 109 109 49 49 49 49 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 116 116 116 116 116 187 187 187 232 232 232 232 50 50 50 50 50 271 271 271 271 271 271 189 189 189 189 221 221 221 221 221 221 221 221 221 221 221 221 221 221 337 337 337 337 337 337 321 321 321 321 321 321 225 225 225 225 337 337 337 337 337 337 337 145 145 145 145 145 225 225 225 225 225 225 204 204 204 204 204 204 204 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 225 225 225 225 225 193 193 193 193 193 193 193 193 193 193 193 193 193 193 276 276 276 276 276 276 276 276 276 276 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 333 333 333 333 333 209 209 209 209 209 209 209 288 288 288 288 288 288 327 327 327 327 327 327 327 327 265 265 265 265 265 265 265 265 265 265 265 265 265 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 228 228 228 228 228 228 228 327 327 327 327 327 133 133 133 277 277 277 277 277 277 277 204 204 204 204 204 204 175 175 175 175 175 225 225 225 225 225 225 37 37 37 37 37 37 116 116 116 287 287 287 287 188 188 188 188 279 279 279 279 279 279 279 279 279 279 279 208 208 208 208 208 208 335 335 335 335 335 335 335 335 335 335 335 320 320 320 320 320 320 320 320 320 320 320 320 320 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 331 331 331 331 49 49 49 49 340 340 340 340 340 340 340 107 107 107 189 189 189 189 189 189 177 177 177 177 177 177 177 193 193 193 193 193 233 233 233 189 189 189 236 236 236 236 236 236 287 287 287 287 188 188 188 188 107 107 107 107 107 107 208 208 208 208 208 208 208 208 208 208 47 47 47 47 47 173 173 173 173 173 173 173 173 173 277 277 277 277 165 165 165 165 165 165 165 165 165 116 116 116 116 335 335 335 335 320 320 320 320 320 320 331 331 331 331 331 331 331 149 149 149 149 149 233 233 233 288 288 288 288 288 219 219 219 219 219 219 219 219 219 53 53 53 53 53 229 229 229 229 229 189 189 189 236 236 236 236 236 236 171 171 171 171 171 171 171 171 171 69 69 69 276 276 276 276 227 227 227 227 227 227 227 208 208 208 208 208 208 208 208 208 208 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 179 179 179 179 179 179 179 37 37 37 116 116 116 116 116 227 227 227 227 227 227 227 165 165 165 165 165 165 116 116 116 116 51 51 51 51 51 51 51 51 272 272 272 272 272 227 227 227 227 100 100 100 100 100 100 100 100 227 227 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 101 101 101 233 233 233 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 49 49 49 288 288 288 288 187 187 187 187 172 172 172 172 172 335 335 335 335 320 320 320 320 320 115 115 115 115 115 115 115 193 193 193 193 193 117 117 117 117 117 117 49 49 49 49 232 232 232 219 219 219 219 219 219 219 219 219 219 53 53 53 53 228 228 228 228 228 228 228 228 171 171 171 171 171 171 144 144 144 144 144 227 227 227 227 227 227 208 208 208 208 208 287 287 287 287 287 287 287 188 188 188 188 231 231 231 231 231 231 231 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 331 331 331 331 331 331 305 305 305 117 117 117 49 49 49 233 233 233 233 288 288 288 107 107 107 107 107 208 208 208 208 208 208 208 50 50 50 50 50 107 107 107 107 107 107 107 193 193 193 288 288 288 288 288 47 47 47 47 47 47 173 173 173 173 173 173 173 173 173 173 173 277 277 277 277 165 165 165 165 165 165 165 165 165 165 165 165 165 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 191 191 191 288 288 288 288 331 331 331 305 305 305 305 116 116 116 107 107 107 107 107 208 208 208 208 208 208 208 208 208 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 53 53 53 53 53 53 329 329 329 329 329 329 329 329 329 329 329 225 225 225 225 204 204 204 204 204 204 287 287 287 287 287 188 188 188 188 188 279 279 279 279 279 279 279 279 279 225 225 225 225 209 209 209 209 209 209 272 272 272 272 272 272 187 187 187 232 232 232 50 50 50 50 50 50 331 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 225 116 116 116 116 116 116 116 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 133 133 133 133 277 277 277 277 277 277 277 277 277 204 204 204 204 204 204 287 287 287 287 287 287 287 287 277 277 277 277 277 208 208 208 208 208 208 208 208 208 208 208 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 224 224 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 331 331 331 331 189 189 189 292 292 292 292 292 292 292 292 292 107 107 107 107 107 107 107 107 107 225 225 225 225 225 225 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 228 228 228 228 228 228 228 187 187 187 232 232 232 232 119 119 119 119 52 52 52 227 227 227 227 227 227 227 227 227 321 321 321 321 321 321 321 321 321 321 321 233 233 233 233 233 233 285 285 285 285 285 285 285 285 285 285 285 285 105 105 105 105 105 105 105 105 105 105 232 232 232 232 232 232 232 115 115 115 115 115 115 115 249 249 249 233 233 233 233 233 288 288 288 288 288 335 335 335 335 320 320 320 291 291 291 291 291 291 291 291 291 193 193 193 193 193 193 193 237 237 237 237 237 237 237 237 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 335 335 335 335 335 335 335 320 320 320 320 320 219 219 219 219 219 219 219 305 305 305 305 116 116 116 187 187 187 229 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 37 37 217 217 217 217 217 217 217 217 49 49 49 232 232 232 232 335 335 335 335 335 320 320 320 320 331 331 331 331 331 144 144 144 144 115 115 115 115 115 115 115 115 115 333 333 333 333 133 133 133 225 225 225 225 225 189 189 189 189 189 236 236 236 236 236 236 187 187 187 232 232 232 232 232 227 227 227 227 227 227 227 227 227 227 227 227 21 21 21 21 21 21 21 21 277 277 277 277 277 109 109 109 109 109 49 49 49 224 224 224 224 224 224 224 179 179 179 179 179 179 179 179 179 179 179 179 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 219 219 219 219 219 219 219 305 305 305 305 117 117 117 117 49 49 49 233 233 233 233 288 288 288 288 288 335 335 335 335 335 335 335 320 320 320 320 320 320 320 320 320 320 320 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 316 316 316 316 316 316 179 179 179 179 179 179 179 179 179 37 37 37 37 37 116 116 116 116 287 287 287 287 287 287 287 287 165 165 165 165 165 165 165 165 221 221 221 221 221 221 221 221 49 49 49 49 232 232 232 232 119 119 119 48 48 48 48 48 48 279 279 279 279 279 279 279 279 279 279 279 221 221 221 221 221 277 277 277 277 69 69 69 69 69 69 69 69 233 233 233 233 233 204 204 204 204 204 204 223 223 223 223 223 193 193 193 193 193 289 289 289 49 49 49 49 224 224 224 224 224 224 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 37 233 233 233 233 233 116 116 116 116 116 116 116 116 116 116 116 116 67 67 67 67 67 67 67 67 67 67 67 67 67 221 221 221 221 221 221 221 221 333 333 333 333 145 145 145 145 117 117 117 117 117 225 225 225 225 225 225 204 204 204 204 204 204 204 187 187 187 187 187 232 232 232 232 232 232 179 179 179 179 179 179 179 179 179 179 179 193 193 193 193 193 193 193 193 193 193 193 193 193 193 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 119 119 119 119 119 119 133 133 133 133 133 133 133 133 133 133 133 133 232 232 232 232 232 232 35 35 35 35 233 233 233 233 233 233 116 116 116 116 119 119 119 133 133 133 133 133 133 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 208 208 208 208 115 115 115 115 197 197 197 197 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 117 117 117 117 117 117 189 189 189 189 116 116 116 116 116 116 116 116 331 331 331 331 53 53 53 53 53 53 288 288 288 287 287 287 287 287 287 188 188 188 188 115 115 115 115 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 208 208 208 208 208 219 219 219 219 219 219 219 219 305 305 305 305 305 116 116 116 116 231 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 21 288 288 288 287 287 287 287 287 287 287 287 287 133 133 133 133 133 224 224 224 224 224 224 224 224 224 119 119 119 119 189 189 189 189 189 189 280 280 280 280 280 280 280 280 280 280 111 111 111 111 111 111 111 111 111 111 111 111 111 111 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 225 225 225 116 116 116 116 116 331 331 331 331 189 189 189 120 120 120 119 119 119 52 52 52 175 175 175 175 175 175 175 175 175 225 225 225 225 225 225 225 225 249 249 249 249 249 249 249 249 249 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 119 49 49 49 49 49 288 288 288 119 119 119 119 133 133 133 276 276 276 276 179 179 179 179 179 179 37 37 37 116 116 116 116 116 107 107 107 107 107 107 49 49 49 232 232 232 232 232 50 50 50 50 227 227 227 227 227 189 189 189 189 189 281 281 281 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 289 165 165 165 165 165 165 165 165 165 165 165 165 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 208 208 208 208 208 331 331 331 331 331 331 305 305 305 305 116 116 116 116 287 287 287 287 287 287 287 165 165 165 165 165 165 165 165 220 220 220 220 220 179 179 179 144 144 144 144 144 144 144 144 144 179 179 179 179 179 179 179 179 249 249 249 249 249 249 249 249 249 249 249 228 228 228 228 228 228 228 47 47 47 233 233 233 116 116 116 223 223 223 133 133 133 133 133 288 288 288 288 288 227 227 227 227 227 17 17 17 277 277 277 277 277 277 277 277 277 277 277 193 193 193 193 225 225 225 225 225 225 225 48 48 48 48 48 48 115 115 115 115 115 115 320 320 320 320 320 320 320 320 320 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 219 219 219 219 219 219 219 305 305 305 305 305 305 117 117 117 49 49 49 232 232 232 232 107 107 107 107 107 107 107 204 204 204 204 204 204 204 204 223 223 223 223 223 133 133 133 133 133 133 173 173 173 173 173 173 288 288 288 288 288 288 35 35 35 35 288 288 288 288 107 107 107 107 107 107 107 277 277 277 277 277 101 101 101 101 101 101 101 101 101 288 288 288 288 275 275 275 275 275 275 193 193 193 193 193 193 329 329 329 329 329 329 144 144 144 144 144 144 144 144 144 131 131 131 131 233 233 233 233 233 205 205 205 205 205 205 205 181 181 181 181 181 181 181 181 181 181 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 231 231 231 231 231 231 231 231 231 248 248 248 248 227 227 227 227 227 227 227 227 37 37 37 37 37 37 289 289 289 289 289 144 144 144 144 144 144 331 331 331 331 331 331 331 331 53 53 53 53 53 53 53 288 288 288 288 288 288 227 227 227 227 189 189 189 189 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 165 165 165 165 165 165 220 220 220 220 179 179 179 37 37 37 116 116 116 107 107 107 107 189 189 189 189 232 232 232 227 227 227 227 227 227 227 227 165 165 165 165 165 165 165 165 165 165 165 165 165 165 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 219 219 219 219 219 219 219 333 333 333 333 333 133 133 133 133 133 281 281 281 281 281 281 281 281 281 113 113 113 113 113 113 113 49 49 49 49 49 233 233 233 233 233 233 233 233 340 340 340 340 47 47 47 233 233 233 116 116 116 116 135 135 135 135 221 221 221 221 221 221 221 281 281 281 281 281 273 273 273 273 273 225 225 225 225 225 49 49 49 49 233 233 233 233 165 165 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 233 233 233 233 233 233 233 233 233 233 340 340 340 340 340 340 340 340 227 227 227 227 227 227 101 101 101 101 101 101 101 288 288 288 288 288 288 131 131 131 131 340 340 340 340 340 340 340 340 331 331 331 331 331 331 331 133 133 133 133 133 133 133 133 133 224 224 224 224 224 224 224 107 107 107 107 107 107 204 204 204 204 204 204 115 115 115 115 115 189 189 189 189 173 173 173 173 173 173 173 173 173 173 173 173 173 149 149 149 149 149 149 149 149 149 149 116 116 116 116 116 47 47 47 47 233 233 233 233 289 289 289 289 289 289 289 193 193 193 193 224 224 224 179 179 179 179 179 179 208 208 208 208 331 331 331 331 331 49 49 49 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 165 165 165 165 165 165 165 165 173 173 173 173 173 173 173 225 225 225 225 225 204 204 204 204 107 107 107 107 107 107 37 37 37 37 37 37 37 37 37 220 220 220 220 220 220 220 220 35 35 35 288 288 288 288 175 175 175 175 175 175 277 277 277 277 277 209 209 209 209 209 209 232 232 232 232 232 232 175 175 175 175 175 175 165 165 165 165 165 165 165 165 165 165 165 109 109 109 109 109 49 49 49 225 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 228 228 228 228 228 228 228 279 279 279 279 279 279 279 279 279 279 279 279 21 21 21 21 21 21 277 277 277 277 277 277 204 204 204 204 204 204 204 102 102 102 102 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 340 223 223 223 223 223 223 165 165 165 165 165 165 165 165 165 165 165 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 116 116 116 116 116 283 283 283 283 283 283 283 283 283 283 283 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 219 219 219 219 219 219 219 219 219 53 53 53 53 228 228 228 228 47 47 47 47 225 225 225 225 225 225 225 69 69 69 69 69 69 69 69 69 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 52 52 52 52 52 179 179 179 179 179 179 69 69 69 69 69 69 69 277 277 277 277 277 277 277 280 280 280 280 280 280 280 187 187 187 187 187 340 340 340 340 340 340 340 247 247 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 329 144 144 144 144 144 187 187 187 187 187 232 232 232 232 119 119 119 119 204 204 204 204 335 335 335 335 335 335 335 21 21 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 175 175 175 175 175 175 175 193 193 193 328 328 328 328 227 227 227 227 208 208 208 335 335 335 335 305 305 305 276 276 276 276 276 276 276 107 107 107 107 107 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 176 176 176 176 176 176 176 176 176 176 176 176 176 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 102 102 102 102 102 102 102 102 219 219 219 219 219 219 49 49 49 49 49 232 232 232 232 232 219 219 219 219 219 219 219 219 219 219 219 219 133 133 133 133 133 133 277 277 277 277 277 277 277 277 204 204 204 204 204 204 204 204 204 204 204 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 52 52 52 52 52 111 111 111 111 111 111 111 111 111 111 111 111 101 101 101 101 101 101 101 101 225 225 225 225 225 116 116 116 116 275 275 275 189 189 189 189 281 281 281 281 281 273 273 273 273 273 273 273 21 21 21 21 21 21 233 233 233 233 117 117 117 117 189 189 189 116 116 116 116 116 116 111 111 111 111 111 111 111 111 111 111 111 193 193 193 193 277 277 277 277 277 277 277 173 173 173 173 173 173 173 173 49 49 49 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 191 191 288 288 288 288 288 187 187 187 187 341 341 341 341 341 341 49 49 49 49 233 233 233 288 288 288 179 179 179 179 179 179 179 179 133 133 133 133 329 329 329 329 329 329 329 329 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 328 328 328 328 175 175 175 175 175 175 21 21 21 21 21 21 21 21 21 21 21 288 288 288 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 227 227 227 227 227 227 227 227 227 100 100 100 100 100 100 100 100 100 100 331 331 331 331 331 331 331 331 331 331 149 149 149 149 149 225 225 225 225 225 225 117 117 117 117 225 225 225 225 204 204 204 204 204 204 175 175 175 175 175 175 175 305 305 305 305 305 305 305 117 117 117 117 117 117 117 340 340 340 340 340 187 187 187 187 232 232 232 232 232 187 187 187 187 187 187 187 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 288 288 288 288 191 191 191 288 288 288 288 288 187 187 187 187 187 341 341 341 341 341 341 49 49 49 233 233 233 288 288 288 179 179 179 179 179 179 179 179 133 133 133 133 133 329 329 329 329 329 329 329 329 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 116 187 187 187 187 187 187 187 172 172 172 172 172 172 172 172 191 191 191 288 288 288 288 191 191 191 191 191 191 341 341 341 341 49 49 49 232 232 232 232 232 232 219 219 219 219 219 219 219 219 219 219 219 219 219 219 133 133 133 133 277 277 277 277 277 277 277 277 277 205 205 205 205 205 205 205 116 116 116 116 187 187 187 187 232 232 232 232 232 232 232 232 232 215 215 215 215 215 215 53 53 53 53 53 281 281 281 281 281 281 281 281 288 288 288 288 50 50 50 50 279 279 279 279 279 279 279 279 279 279 279 279 149 149 149 149 149 149 149 149 149 149 289 289 289 289 49 49 49 232 232 232 232 232 232 331 331 331 331 331 331 331 331 164 164 164 164 164 164 164 164 164 164 164 164 164 164 119 119 119 119 119 52 52 52 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 233 233 233 233 233 117 117 117 49 49 49 224 224 224 224 224 224 271 271 271 271 271 271 271 271 305 305 305 305 305 225 225 225 225 225 225 225 340 340 340 340 340 340 340 83 83 83 83 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 99 99 99 99 99 99 99 99 99 99 99 116 116 116 107 107 107 107 107 133 133 133 289 289 289 289 144 144 144 144 144 144 144 144 219 219 219 219 219 219 219 219 219 219 219 219 209 209 209 209 209 209 209 272 272 272 272 272 187 187 187 187 187 187 288 288 288 288 107 107 107 107 189 189 189 189 189 221 221 221 221 221 221 49 49 49 49 49 49 340 340 340 340 340 340 340 102 102 102 102 102 102 102 102 102 102 231 231 231 231 231 231 231 231 231 248 248 248 248 248 248 248 119 119 119 119 119 119 204 204 204 204 204 204 187 187 187 187 177 177 177 177 177 177 177 177 177 341 341 341 341 341 341 341 341 341 37 37 37 37 37 37 37 37 37 37 37 221 221 221 221 221 221 221 221 221 221 221 221 288 288 288 231 231 231 231 231 231 231 37 37 37 37 37 37 37 37 37 220 220 220 220 220 220 220 47 47 47 47 328 328 328 328 328 328 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 289 289 289 289 289 280 280 280 280 47 47 47 232 232 232 127 127 127 221 221 221 221 221 221 281 281 281 281 289 289 289 289 289 289 277 277 277 209 209 209 209 209 209 209 209 229 229 229 229 225 225 225 204 204 204 204 204 204 204 204 204 204 204 247 247 247 247 247 247 247 247 247 247 225 225 225 225 225 225 225 225 225 116 116 116 116 116 219 219 219 219 219 219 219 219 219 21 21 21 21 21 277 277 277 277 277 277 273 273 273 273 273 49 49 49 49 288 288 288 288 107 107 107 107 107 107 107 37 37 37 37 37 37 37 37 37 37 37 37 37 176 176 176 176 176 176 176 176 176 176 176 176 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 99 99 99 99 99 228 228 228 228 228 228 228 228 327 327 327 327 133 133 133 133 277 277 277 277 277 277 277 277 204 204 204 204 204 204 204 175 175 175 175 175 175 225 225 225 225 225 37 37 37 37 37 37 37 37 37 116 116 116 116 116 335 335 335 335 335 321 321 321 328 328 328 328 328 328 328 219 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 53 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 207 207 207 207 207 207 207 207 207 207 207 207 207 207 329 329 329 329 329 189 189 189 232 232 232 232 187 187 187 187 187 187 172 172 172 172 172 172 172 187 187 187 187 187 288 288 288 288 288 288 288 331 331 331 331 305 305 305 116 116 116 179 179 179 37 37 37 328 328 328 328 328 328 107 107 107 107 107 107 107 193 193 193 193 232 232 232 231 231 231 231 231 231 231 231 101 101 101 101 101 101 101 101 101 101 280 280 280 280 280 280 280 287 287 287 287 287 48 48 48 48 48 279 279 279 279 279 279 279 279 225 225 225 209 209 209 209 209 272 272 272 272 272 272 187 187 187 232 232 232 232 50 50 50 50 50 50 50 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 116 116 116 116 116 111 111 111 111 111 111 111 111 111 111 111 111 133 133 133 133 133 133 277 277 277 277 277 277 277 277 204 204 204 204 204 204 287 287 287 287 287 287 287 287 287 287 277 277 277 277 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 209 209 209 209 328 328 328 328 175 175 175 175 175 175 21 21 21 21 21 288 288 288 287 287 287 287 188 188 188 115 115 115 115 115 277 277 277 277 101 101 101 101 101 101 101 101 328 328 328 328 328 328 328 50 50 50 50 223 223 223 223 223 69 69 69 69 69 69 69 69 236 236 236 236 236 236 271 271 271 271 271 271 271 271 271 271 271 209 209 209 209 209 209 209 209 209 209 280 280 280 280 280 280 280 280 280 179 179 179 179 37 37 37 37 37 37 37 329 329 329 329 49 49 49 233 233 233 233 233 288 288 288 331 331 331 331 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 189 189 189 189 340 340 340 340 279 279 279 279 273 273 273 273 273 273 133 133 133 133 133 133 233 233 233 233 233 281 281 281 281 281 281 281 144 144 144 144 144 144 279 279 279 279 279 279 279 279 279 133 133 133 133 116 116 116 116 116 191 191 191 191 288 288 288 288 331 331 331 49 49 49 340 340 340 340 340 340 340 163 163 163 163 163 163 163 163 163 163 163 163 163 163 163 288 288 288 288 288 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 228 228 228 228 228 228 228 175 175 175 175 175 225 225 225 225 37 37 37 37 37 37 37 37 37 37 37 116 116 116 116 1 1 1 107 107 107 107 189 189 189 189 189 221 221 221 221 221 49 49 49 49 340 340 340 340 340 340 102 102 102 102 102 102 102 102 102 102 102 102 102 223 223 223 223 223 223 223 223 53 53 53 53 53 53 328 328 328 328 328 328 328 328 328 115 115 115 115 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 329 329 329 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 187 187 187 187 187 288 288 288 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 229 229 229 229 229 229 340 340 340 340 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 248 248 331 331 331 331 331 331 331 331 331 53 53 53 53 233 233 233 233 233 233 233 233 233 233 233 117 117 117 117 145 145 145 145 173 173 173 173 173 173 173 173 49 49 49 224 224 224 224 224 119 119 119 49 49 49 288 288 288 288 99 99 99 99 99 228 228 228 228 228 228 175 175 175 175 175 249 249 249 189 189 189 189 189 236 236 236 236 287 287 287 287 48 48 48 48 48 48 223 223 223 223 223 223 223 223 223 223 223 193 193 193 193 328 328 328 328 328 331 331 331 331 193 193 193 193 292 292 292 292 335 335 335 320 320 320 320 320 320 47 47 47 47 233 233 233 116 116 116 107 107 107 189 189 189 225 225 225 225 225 225 225 225 225 225 225 225 225 69 69 69 69 69 69 69 69 236 236 236 236 236 236 287 287 287 287 287 287 287 287 287 320 320 320 320 335 335 335 335 335 335 335 335 335 335 335 320 320 320 320 320 320 320 320 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 328 328 328 328 328 328 328 231 231 231 133 133 133 329 329 329 329 329 144 144 144 144 107 107 107 107 107 189 189 189 225 225 225 225 225 225 225 225 225 225 69 69 69 69 69 237 237 237 237 116 116 116 287 287 287 287 287 287 320 320 320 320 320 320 320 131 131 131 233 233 233 233 233 205 205 205 205 205 205 109 109 109 109 109 49 49 49 49 49 49 117 117 117 117 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 231 231 231 231 231 231 231 21 21 21 21 21 21 288 288 288 288 288 288 288 275 275 275 275 275 275 209 209 209 209 209 209 209 209 209 209 209 225 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 107 53 53 53 53 53 288 288 288 119 119 119 119 204 204 204 204 204 204 47 47 47 47 47 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 49 49 49 228 228 228 228 228 331 331 331 49 49 49 340 340 340 340 340 340 340 340 119 119 119 52 52 52 52 331 331 331 331 331 331 331 331 331 331 331 149 149 149 149 149 149 149 149 149 149 149 149 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 328 328 328 328 328 328 328 328 247 247 247 247 247 247 233 233 233 225 225 225 225 204 204 204 204 204 107 107 107 107 107 107 107 193 193 193 232 232 232 232 232 187 187 187 187 187 232 232 232 232 232 191 191 191 191 191 288 288 288 288 288 171 171 171 171 171 171 171 171 171 69 69 69 69 69 69 69 276 276 276 276 276 227 227 227 227 227 227 227 227 227 53 53 53 53 53 53 53 53 233 233 233 233 233 293 293 293 293 293 293 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 288 288 288 288 119 119 119 119 119 37 37 37 37 37 37 37 288 288 288 288 288 331 331 331 49 49 49 340 340 340 340 340 340 340 340 187 187 187 187 187 233 233 233 233 233 233 233 233 233 53 53 53 53 53 53 53 53 53 53 53 53 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 172 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 289 289 289 289 289 289 280 280 280 280 280 280 280 280 280 331 331 331 331 331 149 149 149 149 149 149 149 280 280 280 280 280 280 280 280 119 119 119 119 49 49 49 49 49 232 232 232 232 232 232 232 1 1 1 1 1 131 131 131 131 131 131 131 131 131 233 233 233 233 233 205 205 205 205 205 205 205 205 205 293 293 293 293 293 293 197 197 197 197 236 236 236 335 335 335 335 335 320 320 320 320 320 320 219 219 219 219 219 305 305 305 116 116 116 187 187 187 229 229 229 229 229 229 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 37 37 37 37 37 217 217 217 217 217 217 217 217 217 217 217 49 49 49 49 49 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 193 193 193 193 193 281 281 281 281 281 281 189 189 189 189 340 340 340 340 279 279 279 279 279 279 279 273 273 273 273 273 273 133 133 133 133 233 233 233 233 233 233 233 233 281 281 281 281 281 144 144 144 144 144 279 279 279 279 279 279 279 279 133 133 133 116 116 116 191 191 191 288 288 288 288 331 331 331 49 49 49 340 340 340 340 340 340 340 340 340 340 340 331 331 331 193 193 193 193 221 221 221 221 221 221 221 49 49 49 116 116 116 47 47 47 328 328 328 328 328 328 227 227 227 227 227 208 208 208 208 287 287 287 287 287 188 188 188 188 188 188 287 287 287 287 287 287 287 287 287 287 69 69 69 69 69 69 69 69 69 69 220 220 220 220 220 220 220 220 223 223 223 223 223 101 101 101 101 101 101 220 220 220 220 220 220 220 220 220 119 119 119 119 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 164 164 164 164 164 164 331 331 331 331 331 331 331 331 144 144 144 144 144 144 144 175 175 175 175 175 175 175 305 305 305 305 305 305 305 305 305 305 305 305 305 305 305 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 335 335 335 335 335 335 335 335 320 320 320 320 320 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 204 204 204 47 47 47 47 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 49 49 49 49 228 228 228 228 271 271 271 271 271 271 209 209 209 209 209 209 209 209 209 209 273 273 273 273 273 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 53 53 53 53 288 288 288 119 119 119 133 133 133 133 276 276 276 276 276 191 191 191 191 191 340 340 340 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 223 223 223 223 223 223 193 193 193 289 289 289 289 49 49 49 224 224 224 224 224 224 279 279 279 279 279 279 279 279 279 279 279 279 279 221 221 221 221 221 221 221 249 249 249 249 249 249 249 272 272 272 272 171 171 171 171 171 171 171 171 144 144 144 144 119 119 119 119 204 204 204 204 204 204 204 187 187 187 187 229 229 229 229 229 229 229 229 229 41 41 41 41 41 41 41 217 217 217 217 217 217 217 217 217 217 49 49 49 233 233 233 233 233 233 233 233 233 233 233 233 233 165 165 165 165 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 285 285 285 49 49 49 232 232 232 232 232 187 187 187 187 187 232 232 232 232 232 47 47 47 232 232 232 232 232 47 47 47 47 47 47 47 47 281 281 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 225 49 49 49 49 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 247 247 247 247 247 247 247 247 247 247 247 247 233 233 233 233 233 225 225 225 225 225 204 204 204 204 204 204 204 215 215 215 215 215 215 215 53 53 53 53 53 281 281 281 281 281 281 288 288 288 288 288 187 187 187 187 232 232 232 232 119 119 119 204 204 204 204 204 204 204 51 51 51 51 51 121 121 121 121 121 144 144 144 144 144 144 144 144 144 67 67 67 67 67 67 67 67 67 277 277 277 277 277 277 173 173 173 173 173 173 173 173 173 173 173 173 49 49 49 233 233 233 233 233 233 233 233 233 233 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 191 191 288 288 288 288 288 331 331 331 331 331 331 21 21 21 21 21 21 340 340 340 340 340 340 271 271 271 271 271 271 277 277 277 277 193 193 193 289 289 289 204 204 204 204 204 204 204 191 191 191 191 191 191 233 233 233 233 289 289 289 277 277 277 49 49 49 49 281 281 281 281 289 289 289 289 289 289 189 189 189 236 236 236 236 236 287 287 287 287 287 287 287 48 48 48 48 187 187 187 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 37 217 217 217 217 217 217 217 49 49 49 49 232 232 232 291 291 291 291 291 291 193 193 193 237 237 237 237 237 237 237 237 340 340 340 340 340 340 47 47 47 47 109 109 109 109 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 119 119 119 119 119 133 133 133 133 133 133 133 133 133 133 133 133 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 320 320 320 320 320 320 179 179 179 37 37 37 116 116 116 116 107 107 107 107 189 189 189 232 232 232 232 232 232 232 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 249 249 249 249 249 249 249 225 225 225 225 225 225 225 49 49 49 232 232 232 232 47 47 47 47 47 333 333 333 333 333 333 333 164 164 164 164 164 164 164 171 171 171 171 171 171 171 171 171 145 145 145 228 228 228 179 179 179 179 179 179 179 144 144 144 144 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 133 133 133 133 133 277 277 277 277 277 277 49 49 49 49 233 233 233 289 289 289 289 280 280 280 280 187 187 187 232 232 232 232 232 179 179 179 179 179 144 144 144 144 144 144 144 144 144 144 191 191 191 191 191 191 191 191 233 233 233 233 233 233 233 173 173 173 173 173 173 49 49 49 49 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 204 204 204 204 204 204 107 107 107 107 107 107 107 100 100 100 100 100 100 100 100 100 100 100 50 50 50 50 50 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 277 277 277 277 321 321 321 321 321 321 321 321 321 321 321 224 224 224 224 224 224 224 224 231 231 231 231 231 231 231 231 231 231 149 149 149 149 149 149 149 149 149 149 149 149 149 149 280 280 280 280 280 280 280 280 179 179 179 179 179 179 320 320 320 320 320 320 115 115 115 115 115 115 115 115 115 115 115 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 116 116 116 107 107 107 107 107 189 189 189 189 189 173 173 173 173 173 173 69 69 69 276 276 276 283 283 283 283 283 283 283 283 283 208 208 208 208 219 219 219 219 219 219 305 305 305 116 116 116 116 219 219 219 219 219 49 49 49 233 233 233 233 173 173 173 173 173 173 173 173 173 173 173 173 133 133 133 133 133 133 133 133 133 133 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 335 335 335 335 335 335 335 335 335 321 321 321 321 341 341 341 341 341 341 341 341 116 116 116 287 287 287 48 48 48 48 48 48 223 223 223 223 223 223 223 100 100 100 100 100 100 100 100 100 100 47 47 47 47 47 47 47 333 333 333 333 333 333 165 165 165 165 165 220 220 220 220 220 220 220 35 35 35 288 288 288 288 288 288 231 231 231 231 231 101 101 101 101 101 101 101 101 101 101 101 289 289 289 289 289 280 280 280 47 47 47 233 233 233 116 116 116 187 187 187 229 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 217 217 217 217 217 217 217 217 49 49 49 49 49 232 232 232 232 232 291 291 291 291 291 291 193 193 193 237 237 237 237 237 237 237 237 340 340 340 340 340 223 223 223 223 223 223 223 101 101 101 101 101 101 101 101 220 220 220 220 220 220 220 119 119 119 119 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 189 189 189 189 221 221 221 221 221 221 49 49 49 49 340 340 340 340 340 340 102 102 102 102 102 102 115 115 115 115 115 193 193 193 193 193 117 117 117 233 233 233 288 288 288 179 179 179 179 179 179 179 37 37 37 37 328 328 328 287 287 287 287 287 287 287 287 287 287 287 101 101 101 101 101 101 101 101 101 101 101 228 228 228 228 228 228 187 187 187 232 232 232 119 119 119 48 48 48 48 115 115 115 115 115 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 175 175 175 175 175 175 175 175 175 133 133 133 133 133 280 280 280 280 280 280 119 119 119 119 119 37 37 37 289 289 289 289 280 280 280 280 280 331 331 331 331 331 100 100 100 100 100 100 100 100 100 100 47 47 47 47 228 228 228 228 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 291 291 291 291 291 291 291 291 291 291 193 193 193 193 193 193 193 193 232 232 232 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 35 35 35 35 35 35 35 35 35 228 228 228 228 228 228 115 115 115 115 115 277 277 277 133 133 133 117 117 117 117 173 173 173 173 173 173 49 49 49 224 224 224 224 224 224 291 291 291 291 291 291 193 193 193 193 193 232 232 232 232 232 232 163 163 163 163 163 163 163 163 163 163 163 233 233 233 288 288 288 288 288 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 133 133 133 133 276 276 276 191 191 191 191 191 191 341 341 341 341 49 49 49 232 232 232 50 50 50 50 50 50 50 50 50 50 50 1 1 1 1 271 271 271 271 271 271 271 271 193 193 193 193 193 193 193 220 220 220 220 220 19 19 19 19 19 232 232 232 232 232 227 227 227 227 227 100 100 100 100 100 100 100 100 100 100 100 107 107 107 107 107 107 107 107 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 233 233 233 233 233 233 233 233 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 115 115 115 115 115 115 115 115 115 115 115 115 320 320 320 320 320 320 320 320 320 320 223 223 223 223 223 223 223 53 53 53 53 53 53 53 53 328 328 328 328 328 287 287 287 287 287 287 287 287 320 320 320 320 320 187 187 187 229 229 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 37 217 217 217 217 217 217 217 217 217 49 49 49 232 232 232 232 99 99 99 99 99 99 99 99 99 99 228 228 228 228 228 228 228 228 228 228 231 231 231 231 231 101 101 101 101 101 101 101 101 101 101 101 101 101 280 280 280 280 280 280 280 280 280 280 280 280 280 280 47 47 47 233 233 233 116 116 116 116 116 271 271 271 271 271 271 271 271 271 271 271 225 225 225 225 225 53 53 53 53 229 229 229 229 229 229 229 229 229 229 272 272 272 272 272 272 331 331 331 189 189 189 292 292 292 292 292 292 292 292 292 115 115 115 115 115 193 193 193 193 229 229 229 229 229 229 229 273 273 273 273 273 49 49 49 49 225 225 225 225 225 225 340 340 340 340 340 340 340 187 187 187 187 232 232 232 227 227 227 227 227 100 100 100 100 100 100 100 100 100 100 100 131 131 131 131 131 131 131 225 225 225 225 225 225 109 109 109 109 109 109 253 253 253 253 253 253 253 253 253 253 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 193 193 193 193 193 193 193 120 120 120 120 120 120 120 120 119 119 119 119 193 193 193 193 193 193 193 193 193 193 193 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 317 317 317 317 340 340 340 340 340 340 340 219 219 219 219 49 49 49 49 229 229 229 229 273 273 273 273 273 273 273 273 273 273 273 273 37 37 37 37 233 233 233 233 233 233 233 337 337 337 337 337 49 49 49 49 232 232 232 232 232 232 279 279 279 279 279 279 279 289 289 289 289 289 289 21 21 21 21 21 21 21 21 21 273 273 273 288 288 288 287 287 287 287 287 287 287 69 69 69 69 69 69 69 69 69 69 69 221 221 221 221 221 221 221 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 271 271 271 271 271 271 271 21 21 21 21 21 277 277 277 277 277 277 277 277 289 289 289 289 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 189 189 189 189 189 221 221 221 221 221 221 49 49 49 49 340 340 340 283 283 283 283 208 208 208 208 208 331 331 331 331 331 49 49 49 340 340 340 340 340 83 83 83 83 83 83 83 83 288 288 288 288 288 288 47 47 47 328 328 328 328 328 328 328 107 107 107 107 107 277 277 277 277 277 133 133 133 133 133 133 133 133 133 133 133 292 292 292 292 292 292 292 292 292 292 292 292 292 292 292 292 292 292 35 35 35 233 233 233 233 116 116 116 116 116 271 271 271 271 271 271 271 21 21 21 21 21 21 21 277 277 277 277 277 277 277 277 289 289 289 289 289 225 225 225 225 204 204 204 204 204 204 204 107 107 107 107 107 189 189 189 189 189 221 221 221 221 221 221 221 221 49 49 49 49 49 49 49 49 340 340 340 340 340 340 119 119 119 119 119 164 164 164 164 164 164 164 164 179 179 179 179 37 37 37 116 116 116 116 116 116 275 275 275 275 275 275 209 209 209 209 209 113 113 113 113 113 113 113 113 113 113 113 113 113 288 288 288 119 119 119 52 52 52 52 107 107 107 107 107 107 53 53 53 53 53 53 53 177 177 177 177 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 231 231 231 231 231 21 21 21 21 21 288 288 288 288 288 288 47 47 47 47 233 233 233 233 53 53 53 53 53 53 53 53 121 121 121 121 121 144 144 144 144 144 331 331 331 331 331 331 331 331 331 331 331 331 331 331 149 149 149 149 149 149 149 149 149 149 149 116 116 116 115 115 115 115 189 189 189 116 116 116 116 116 116 116 283 283 283 283 283 283 283 283 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 279 164 164 164 164 164 164 164 164 164 164 164 164 47 47 47 47 47 47 233 233 233 233 233 289 289 289 289 289 289 289 193 193 193 193 193 224 224 224 224 119 119 119 119 119 164 164 164 164 179 179 179 179 179 179 37 37 37 37 116 116 116 116 116 223 223 223 223 223 133 133 133 133 133 133 173 173 173 173 173 173 173 288 288 288 288 288 288 119 119 119 119 48 48 48 48 327 327 327 327 327 327 327 327 327 193 193 193 193 193 225 225 225 225 225 225 225 189 189 189 189 189 189 189 216 216 216 216 216 216 216 216 216 216 216 47 47 47 233 233 233 116 116 116 331 331 331 331 144 144 144 144 115 115 115 115 115 115 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 329 329 329 329 329 329 189 189 189 189 236 236 236 236 236 236 236 115 115 115 115 115 115 85 85 85 85 85 85 85 85 85 85 85 85 232 232 232 232 232 50 50 50 50 50 279 279 279 279 279 279 279 279 279 279 289 289 289 289 289 289 289 289 209 209 209 209 209 209 209 209 209 209 272 272 272 272 272 272 272 272 223 223 223 223 223 193 193 193 193 289 289 289 49 49 49 49 224 224 224 224 179 179 179 179 179 179 179 179 179 179 179 193 193 193 193 193 193 193 193 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 48 48 48 48 48 48 48 275 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 249 249 249 116 116 116 116 116 116 271 271 271 271 271 21 21 21 21 21 21 21 277 277 277 277 277 277 288 288 288 288 288 288 47 47 47 328 328 328 328 328 328 328 328 328 331 331 331 331 331 193 193 193 193 193 193 193 193 112 112 112 112 112 112 112 112 179 179 179 37 37 37 116 116 116 107 107 107 107 189 189 189 232 232 232 232 232 232 232 219 219 219 219 219 219 219 219 219 219 53 53 53 53 53 53 53 53 288 288 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 115 115 115 115 115 115 115 115 115 115 209 209 209 209 209 209 209 209 273 273 273 273 273 273 273 225 225 225 225 225 225 204 204 204 204 187 187 187 187 233 233 233 233 289 289 289 289 289 289 48 48 48 48 119 119 119 119 52 52 52 52 52 279 279 279 279 279 279 279 279 279 279 279 279 279 279 69 69 69 69 69 69 69 69 69 69 69 173 173 173 173 173 173 173 173 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 265 265 265 265 265 265 265 265 265 265 265 265 265 265 265 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 49 49 49 49 288 288 288 288 119 119 119 119 48 48 48 48 48 107 107 107 107 107 107 107 107 107 107 107 37 37 37 37 37 37 37 37 237 237 237 237 237 237 237 237 237 237 237 237 221 221 221 221 221 221 221 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 171 171 171 171 171 277 277 277 277 277 193 193 193 193 233 233 233 233 233 233 217 217 217 217 217 217 217 116 116 116 331 331 331 331 189 189 189 189 120 120 120 120 120 120 120 107 107 107 107 107 107 107 225 225 225 225 225 225 321 321 321 321 321 321 321 321 229 229 229 229 229 229 229 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 236 236 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 225 225 116 116 116 116 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 133 133 133 133 133 133 277 277 277 277 277 277 277 204 204 204 204 204 204 204 287 287 287 287 287 287 287 287 287 287 277 277 277 277 277 209 209 209 209 209 209 209 209 340 340 340 340 340 340 340 47 47 47 233 233 233 233 233 233 116 116 116 116 116 116 279 279 279 279 279 279 279 279 279 279 225 225 225 225 193 193 193 193 193 193 193 193 193 228 228 228 228 228 228 228 228 228 228 228 228 228 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 107 107 107 107 107 107 149 149 149 149 149 149 149 149 149 149 113 113 113 113 113 113 113 113 113 113 113 113 189 189 189 189 189 189 189 189 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 329 329 329 329 329 329 277 277 277 49 49 49 49 224 224 224 224 224 224 171 171 171 171 171 171 171 171 171 171 171 171 171 209 209 209 209 209 209 209 288 288 288 288 288 47 47 47 47 109 109 109 109 109 109 109 109 53 53 53 53 53 53 53 328 328 328 328 328 328 119 119 119 119 133 133 133 133 133 133 276 276 276 276 276 179 179 179 179 179 179 179 179 133 133 133 133 133 133 133 133 133 133 133 133 133 133 117 117 117 117 117 117 117 117 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 52 52 52 52 52 111 111 111 111 111 111 111 111 111 111 111 111 111 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 225 116 116 116 116 116 116 116 271 271 271 271 271 271 271 271 305 305 305 305 305 288 288 288 288 83 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 288 179 179 179 179 179 144 144 144 144 144 144 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 37 37 233 233 233 233 233 233 233 116 116 116 116 47 47 47 233 233 233 116 116 116 116 107 107 107 107 107 277 277 277 277 277 249 249 249 249 249 249 249 220 220 220 220 220 220 220 67 67 67 67 67 67 67 67 172 172 172 172 172 172 172 172 50 50 50 50 107 107 107 107 107 107 107 107 107 107 277 277 277 277 277 37 37 37 37 37 37 37 37 37 233 233 233 233 233 233 233 233 233 112 112 112 112 112 112 112 112 47 47 47 47 328 328 328 328 328 328 328 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 116 116 116 116 116 116 116 116 1 1 1 271 271 271 271 271 271 271 271 271 225 225 225 225 225 225 225 225 53 53 53 53 53 53 53 53 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 228 119 119 119 119 49 49 49 49 288 288 288 288 288 288 288 288 288 107 107 107 107 107 277 277 277 277 53 53 53 53 53 53 53 53 53 285 285 285 285 285 285 285 285 285 285 288 288 288 47 47 47 177 177 177 177 177 177 177 133 133 133 133 233 233 233 233 233 233 233 281 281 281 281 288 288 288 119 119 119 52 52 52 52 279 279 279 279 279 279 279 279 279 101 101 101 101 101 101 101 101 101 116 116 116 116 116 47 47 47 47 328 328 328 119 119 119 119 52 52 52 52 107 107 107 107 107 107 107 107 107 53 53 53 53 53 53 177 177 177 177 177 177 177 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 191 191 191 191 191 191 341 341 341 341 341 341 49 49 49 49 232 232 232 232 232 232 119 119 119 119 119 37 37 37 37 37 37 37 37 37 288 288 288 288 288 1 1 1 1 107 107 107 107 107 107 107 337 337 337 337 337 337 337 337 321 321 321 321 321 321 289 289 289 49 49 49 49 173 173 173 173 173 173 173 173 173 173 173 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 333 333 333 53 53 53 53 288 288 288 115 115 115 115 115 193 193 193 193 116 116 116 119 119 119 119 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 287 287 287 287 287 287 287 287 287 287 287 277 277 277 277 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 223 223 223 223 223 223 223 209 209 209 209 209 209 209 209 209 209 209 233 233 233 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 83 83 83 83 83 83 83 83 83 83 288 288 288 288 288 171 171 171 171 145 145 145 145 228 228 228 228 228 119 119 119 48 48 48 48 48 48 107 107 107 107 107 107 107 107 37 37 37 37 37 37 37 37 37 237 237 237 237 237 237 237 237 220 220 220 220 220 220 220 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 224 224 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 47 47 47 233 233 233 116 116 116 223 223 223 223 223 223 223 223 223 223 165 165 165 165 165 165 165 165 165 165 281 281 281 281 281 281 281 281 281 281 281 281 281 204 204 204 204 204 204 204 227 227 227 227 227 227 165 165 165 165 165 165 165 220 220 220 220 220 335 335 335 335 335 320 320 320 320 320 320 320 320 291 291 291 291 291 193 193 193 193 237 237 237 237 237 237 237 237 220 220 220 220 220 220 220 220 51 51 51 51 51 51 51 51 51 51 51 51 328 328 328 328 328 328 328 328 328 328 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 35 35 35 35 35 35 35 35 35 35 35 35 35 281 281 281 281 281 281 281 281 281 281 221 221 221 221 221 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 331 133 133 133 224 224 224 224 224 224 224 224 224 224 224 1 1 1 231 231 231 231 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 115 115 115 115 49 49 49 49 233 233 233 233 233 233 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 116 116 116 116 116 116 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 337 337 337 337 337 316 316 316 316 316 316 316 316 316 316 316 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 100 100 100 100 100 100 100 100 100 100 100 100 100 100 50 50 50 50 50 50 107 107 107 107 107 107 107 107 107 107 107 107 107 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 116 116 116 116 47 47 47 47 328 328 328 328 328 328 328 328 328 328 328 219 219 219 219 219 219 219 219 219 219 219 219 69 69 69 69 69 69 69 69 277 277 277 277 277 277 277 277 277 277 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 50 50 50 50 50 50 50 50 50 50 107 107 107 107 107 107 107 107 107 107 107 107 107 107 107 107 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 116 116 116 116 116 116 116 1 1 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 187 187 187 187 187 187 187 232 232 232 232 232 232 232 232 232 232 232 232 232 232 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 331 331 331 331 189 189 189 120 120 120 120 120 50 50 50 50 50 50 50 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 223 53 53 53 53 53 53 53 53 53 329 329 329 329 329 329 329 329 329 329 329 329 329 225 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 227 193 193 193 193 281 281 281 281 281 281 281 281 281 281 281 281 281 289 289 289 289 289 289 289 204 204 204 204 204 204 204 204 327 327 327 327 327 327 327 327 327 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 328 328 328 231 231 231 231 231 231 133 133 133 133 133 329 329 329 329 144 144 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 209 232 232 232 232 232 232 331 331 331 331 331 53 53 53 53 53 53 53 53 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 53 53 53 288 288 288 288 102 102 102 102 102 102 102 102 102 102 219 219 219 219 219 219 219 219 49 49 49 49 232 232 232 232 187 187 187 229 229 229 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 37 37 37 37 217 217 217 217 217 217 217 217 217 49 49 49 232 232 232 232 232 232 331 331 331 331 53 53 53 288 288 288 283 283 283 283 283 283 283 283 208 208 208 208 208 331 331 331 331 331 305 305 305 116 116 116 116 116 116 223 223 223 223 223 223 223 223 305 305 305 305 305 220 220 220 220 220 220 220 220 220 223 223 223 223 223 223 101 101 101 101 101 101 101 101 101 101 101 220 220 220 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 115 115 115 115 115 115 249 249 249 249 249 233 233 233 233 288 288 288 288 131 131 131 131 131 329 329 329 329 329 329 329 329 329 144 144 144 144 144 187 187 187 187 187 221 221 221 221 221 221 281 281 281 281 281 273 273 273 273 273 133 133 133 133 133 221 221 221 221 221 288 288 288 287 287 287 188 188 188 188 107 107 107 107 107 208 208 208 208 208 208 208 208 50 50 50 50 50 50 107 107 107 107 107 107 107 277 277 277 277 277 277 277 101 101 101 101 101 101 101 101 101 116 116 116 116 227 227 227 227 227 105 105 105 105 105 105 105 105 281 281 281 281 281 281 281 281 281 281 281 281 281 281 133 133 133 133 133 133 133 133 225 225 225 225 225 225 225 225 225 225 225 225 172 172 172 172 172 172 172 172 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 228 228 228 228 228 228 228 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 179 179 179 179 179 179 179 179 179 179 249 249 249 249 249 249 249 249 249 229 229 229 229 229 229 229 225 225 225 225 225 225 225 204 204 204 204 204 204 231 231 231 231 231 231 231 249 249 249 249 249 249 249 109 109 109 109 109 49 49 49 117 117 117 204 204 204 204 204 204 331 331 331 331 49 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 131 131 131 131 329 329 329 329 329 329 329 144 144 144 144 144 144 331 331 331 331 331 331 331 21 21 21 233 233 233 233 233 288 288 288 287 287 287 287 48 48 48 48 227 227 227 227 227 227 133 133 133 133 133 133 133 133 133 133 133 277 277 277 277 277 277 277 204 204 204 204 204 204 204 204 227 227 227 227 227 227 227 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 47 47 47 233 233 233 225 225 225 225 133 133 133 133 280 280 280 280 280 280 280 280 280 191 191 191 288 288 288 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 288 288 288 107 107 107 107 107 204 204 204 204 204 50 50 50 50 50 171 171 171 171 171 171 171 171 171 171 171 171 171 69 69 69 69 69 277 277 277 277 49 49 49 49 49 232 232 232 227 227 227 227 227 193 193 193 193 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 49 233 233 233 233 137 137 137 137 137 137 277 277 277 277 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 279 279 279 279 279 279 49 49 49 49 273 273 273 273 273 273 273 273 273 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 50 50 50 50 50 171 171 171 171 171 171 171 171 171 171 171 171 69 69 69 69 69 69 277 277 277 277 277 277 49 49 49 49 232 232 232 232 227 227 227 227 227 193 193 193 285 285 285 285 285 285 285 285 285 285 285 49 49 49 233 233 233 233 233 137 137 137 137 137 277 277 277 277 277 204 204 204 204 204 204 204 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 101 289 289 289 289 289 49 49 49 233 233 233 288 288 288 288 288 288 107 107 107 107 107 107 208 208 208 208 208 208 208 208 208 208 208 327 327 327 327 133 133 133 133 277 277 277 277 277 277 277 204 204 204 204 204 204 204 271 271 271 271 271 271 145 145 145 145 289 289 289 289 289 289 193 193 193 193 193 221 221 221 221 221 221 221 337 337 337 337 49 49 49 225 225 225 225 225 225 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 288 288 288 288 102 102 102 102 102 102 102 102 102 102 102 102 115 115 115 115 115 115 115 115 320 320 320 320 320 320 320 320 320 320 320 179 179 179 179 179 179 179 179 179 249 249 249 249 249 272 272 272 272 272 272 119 119 119 119 49 49 49 49 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 228 228 228 228 228 228 228 115 115 115 115 115 164 164 164 164 164 164 164 164 164 164 164 164 102 102 102 102 102 102 102 283 283 283 283 283 283 283 283 283 283 283 37 37 37 37 224 224 224 224 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 328 328 328 328 328 50 50 50 50 50 50 331 331 331 331 331 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 115 115 115 115 115 115 115 115 277 277 277 277 133 133 133 133 133 133 133 133 133 133 133 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 37 37 37 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 1 1 1 187 187 187 187 187 187 187 187 187 340 340 340 340 340 340 227 227 227 227 227 227 227 100 100 100 100 100 100 100 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 101 101 101 101 101 101 101 101 101 101 101 101 101 49 49 49 49 49 281 281 281 281 281 281 281 288 288 288 288 288 95 95 95 95 95 95 95 95 95 95 95 117 117 117 117 117 117 117 117 209 209 209 209 209 209 209 224 224 224 224 224 224 47 47 47 47 328 328 328 328 328 147 147 147 147 147 147 147 147 147 147 147 293 293 293 293 293 293 293 293 225 225 225 225 204 204 204 204 204 204 107 107 107 107 107 225 225 225 225 225 193 193 193 193 193 193 193 193 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 215 215 215 215 215 215 189 189 189 189 189 189 281 281 281 288 288 288 288 288 288 288 288 223 223 223 223 53 53 53 53 53 328 328 328 328 271 271 271 271 271 271 271 271 271 271 271 271 277 277 277 193 193 193 289 289 289 289 204 204 204 204 219 219 219 219 219 219 219 219 219 219 219 219 225 225 225 225 225 225 249 249 249 249 249 249 249 249 249 249 249 249 121 121 121 121 121 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 116 116 99 99 99 99 99 99 99 99 99 99 328 328 328 328 328 328 328 231 231 231 231 133 133 133 133 329 329 329 329 144 144 144 144 144 179 179 179 179 179 179 179 179 179 37 37 37 116 116 116 116 50 50 50 50 50 271 271 271 271 271 271 271 271 271 277 277 277 193 193 193 289 289 289 289 204 204 204 204 204 204 204 115 115 115 115 115 115 115 277 277 277 277 133 133 133 133 133 280 280 280 280 280 280 280 280 280 187 187 187 232 232 232 227 227 227 227 227 227 227 100 100 100 100 100 100 100 100 100 100 100 100 223 223 223 223 223 223 223 101 101 101 101 101 101 101 101 101 101 101 101 172 172 172 172 172 172 172 119 119 119 49 49 49 288 288 288 288 102 102 102 102 102 102 219 219 219 219 219 219 219 49 49 49 49 49 232 232 232 232 232 232 275 275 275 275 275 189 189 189 229 229 229 229 229 229 229 133 133 133 133 133 229 229 229 229 229 229 229 109 109 109 109 109 109 144 144 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 53 53 53 288 288 288 47 47 47 328 328 328 328 328 219 219 219 219 219 219 219 219 69 69 69 69 69 69 277 277 277 277 277 277 277 277 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 191 191 191 191 191 191 191 191 191 289 289 289 289 289 280 280 280 280 280 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 119 119 119 119 119 48 48 48 227 227 227 227 227 227 227 227 227 227 69 69 69 69 276 276 276 276 287 287 287 287 287 287 48 48 48 48 48 223 223 223 223 223 223 305 305 305 305 305 220 220 220 220 220 220 220 220 220 171 171 171 171 171 171 171 171 171 69 69 69 69 277 277 277 277 277 277 333 333 333 333 333 145 145 145 145 116 116 116 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 187 187 187 187 341 341 341 341 341 341 341 49 49 49 233 233 233 288 288 288 288 288 288 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 233 233 233 233 233 116 116 116 119 119 119 133 133 133 133 133 133 133 232 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 219 219 219 219 219 219 219 49 49 49 232 232 232 232 187 187 187 229 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 37 217 217 217 217 217 217 217 217 49 49 49 49 49 232 232 232 119 119 119 49 49 49 49 288 288 288 288 99 99 99 99 99 99 99 228 228 228 228 228 115 115 115 115 115 277 277 277 277 133 133 133 133 281 281 281 281 281 281 281 281 281 281 281 281 288 288 288 288 288 288 288 175 175 175 175 175 175 69 69 69 69 69 69 69 277 277 277 277 277 217 217 217 217 217 217 217 217 49 49 49 49 281 281 281 281 281 281 281 281 281 281 281 281 281 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 189 189 189 189 189 280 280 280 280 280 280 280 280 227 227 227 227 227 227 227 227 227 69 69 69 69 277 277 277 277 233 233 233 233 189 189 189 189 189 236 236 236 236 236 331 331 331 331 331 331 133 133 133 232 232 232 232 232 102 102 102 102 102 102 102 102 102 223 223 223 223 133 133 133 133 133 133 133 173 173 173 173 173 288 288 288 119 119 119 119 204 204 204 204 204 47 47 47 47 47 47 47 281 281 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 49 49 49 49 228 228 228 228 228 228 228 102 102 102 102 102 102 102 102 102 171 171 171 171 171 171 171 171 171 133 133 133 133 225 225 225 225 225 225 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 47 47 47 47 47 285 285 285 285 285 285 285 285 285 285 285 285 285 285 285 285 285 285 165 165 165 165 165 165 165 165 165 165 165 229 229 229 229 229 229 229 116 116 116 116 116 107 107 107 107 107 189 189 189 189 221 221 221 221 221 221 49 49 49 49 49 340 340 340 340 340 340 340 340 102 102 102 102 102 102 179 179 179 179 179 179 37 37 37 37 37 116 116 116 116 116 287 287 287 287 48 48 48 48 48 48 331 331 331 331 331 133 133 133 133 276 276 276 276 119 119 119 119 119 119 189 189 189 189 189 280 280 280 280 280 280 280 280 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 69 69 69 69 69 277 277 277 277 277 277 277 277 277 277 49 49 49 116 116 116 116 116 116 247 247 247 247 247 247 247 247 247 225 225 225 225 225 225 225 116 116 116 1 1 1 331 331 331 331 331 331 193 193 193 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 281 204 204 204 204 204 204 115 115 115 115 115 115 115 115 277 277 277 277 277 277 133 133 133 133 133 133 133 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 119 119 119 119 119 204 204 204 204 204 204 67 67 67 67 67 67 67 277 277 277 277 277 277 173 173 173 173 173 173 173 173 49 49 49 233 233 233 233 233 340 340 340 340 179 179 179 179 179 37 37 37 37 37 116 116 116 116 287 287 287 48 48 48 48 48 331 331 331 331 133 133 133 133 276 276 276 276 276 276 119 119 119 119 119 49 49 49 49 228 228 228 228 228 228 335 335 335 320 320 320 320 320 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 50 50 50 50 50 50 50 50 50 227 227 227 227 227 227 227 227 227 227 227 149 149 149 149 149 149 149 149 113 113 113 113 113 113 113 113 49 49 49 233 233 233 288 288 288 187 187 187 187 187 232 232 232 232 232 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 223 223 223 223 223 223 223 223 223 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 288 288 288 288 288 288 331 331 331 331 331 193 193 193 233 233 233 233 289 289 289 289 289 289 144 144 144 144 115 115 115 115 115 115 115 115 115 249 249 249 249 249 249 249 233 233 233 233 233 169 169 169 169 169 169 169 169 289 289 289 289 289 189 189 189 189 116 116 116 116 116 291 291 291 291 291 291 291 291 291 291 291 277 277 277 277 277 208 208 208 208 208 208 208 208 208 208 208 208 179 179 179 179 179 179 179 179 53 53 53 53 53 53 233 233 233 233 117 117 117 117 145 145 145 145 145 145 116 116 116 116 335 335 335 335 335 335 335 335 21 21 21 21 21 21 21 21 21 21 21 21 277 277 277 277 277 277 277 277 277 117 117 117 117 117 117 340 340 340 340 47 47 47 47 328 328 328 328 328 328 328 331 331 331 331 331 193 193 193 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 204 204 204 204 287 287 287 287 287 48 48 48 119 119 119 119 204 204 204 204 204 204 204 204 47 47 47 47 47 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 225 225 225 225 225 225 225 225 49 49 49 49 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 53 53 53 53 53 53 53 53 228 228 228 228 271 271 271 271 271 209 209 209 209 209 209 273 273 273 273 273 49 49 49 224 224 224 224 279 279 279 279 279 279 279 279 279 133 133 133 116 116 116 116 116 191 191 191 288 288 288 288 288 331 331 331 49 49 49 340 340 340 340 340 340 340 107 107 107 189 189 189 189 221 221 221 221 221 221 221 221 49 49 49 49 340 340 340 340 179 179 179 179 208 208 208 219 219 219 219 219 219 219 219 219 219 305 305 305 117 117 117 117 49 49 49 49 232 232 232 232 232 279 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 224 224 224 224 224 224 224 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 53 53 53 288 288 288 99 99 99 99 99 99 99 99 99 99 99 99 99 116 116 116 116 116 116 116 275 275 275 275 37 37 37 37 37 37 37 37 121 121 121 121 121 144 144 144 144 144 107 107 107 107 107 189 189 189 189 225 225 225 225 225 225 225 225 225 225 209 209 209 209 209 209 209 209 209 209 328 328 328 328 119 119 119 49 49 49 288 288 288 187 187 187 288 288 288 288 331 331 331 49 49 49 340 340 340 340 340 340 340 340 83 83 83 83 83 83 83 288 288 288 288 288 47 47 47 328 328 328 328 119 119 119 119 52 52 52 52 219 219 219 219 219 219 219 219 219 219 219 219 219 219 101 101 101 101 101 101 233 233 233 117 117 117 233 233 233 49 49 49 49 49 280 280 280 280 280 280 280 47 47 47 47 328 328 328 179 179 179 189 189 189 340 340 340 340 340 340 340 179 179 179 179 179 179 179 179 179 179 21 21 21 21 21 21 21 277 277 277 277 277 277 277 288 288 288 288 288 288 288 288 331 331 331 305 305 305 117 117 117 49 49 49 49 49 49 49 233 233 233 288 288 288 288 288 288 335 335 335 335 335 335 335 320 320 320 320 320 320 320 320 320 320 320 320 320 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 133 133 133 133 232 232 232 331 331 331 208 208 208 208 208 208 175 175 175 175 175 175 175 175 21 21 21 21 21 21 288 288 288 288 288 288 19 19 19 19 232 232 232 232 232 119 119 119 52 52 52 52 52 287 287 287 287 287 287 287 287 287 287 287 287 287 277 277 277 277 165 165 165 165 165 165 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 171 171 171 171 171 171 171 171 133 133 133 133 133 225 225 225 225 225 225 288 288 288 288 131 131 131 131 340 340 340 340 340 340 340 340 340 191 191 191 191 172 172 172 172 172 172 172 172 172 1 1 1 131 131 131 131 131 131 131 131 131 131 131 131 329 329 329 329 329 329 329 277 277 277 277 205 205 205 205 205 109 109 109 109 109 109 109 25 25 25 25 25 117 117 117 117 204 204 204 204 204 227 227 227 227 227 227 227 53 53 53 53 53 53 281 281 281 281 281 288 288 288 107 107 107 107 107 204 204 204 204 204 204 204 204 223 223 223 223 223 223 223 223 223 305 305 305 305 221 221 221 221 221 221 221 189 189 189 189 189 236 236 236 236 236 236 236 236 35 35 35 35 35 288 288 288 288 288 227 227 227 227 227 208 208 208 208 208 208 47 47 47 233 233 233 116 116 116 271 271 271 271 271 271 271 271 271 271 271 271 193 193 193 193 289 289 289 289 289 289 289 205 205 205 205 189 189 189 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 1 1 1 227 227 227 227 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 208 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 288 288 288 288 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 215 215 215 215 215 215 215 215 53 53 53 53 53 53 53 281 281 281 281 288 288 288 288 288 331 331 331 331 133 133 133 133 133 233 233 233 288 288 288 287 287 287 48 48 48 48 48 48 331 331 331 331 331 331 331 331 331 149 149 149 149 149 149 220 220 220 220 220 47 47 47 233 233 233 116 116 116 187 187 187 187 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 37 37 37 217 217 217 217 217 217 217 217 217 49 49 49 49 49 233 233 233 116 116 116 119 119 119 49 49 49 288 288 288 288 102 102 102 102 102 102 102 102 102 102 102 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 116 116 116 116 19 19 19 19 19 19 19 19 232 232 232 232 232 119 119 119 52 52 52 227 227 227 227 227 227 249 249 249 249 249 249 281 281 281 281 281 281 281 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 337 337 337 337 337 337 337 337 337 337 337 337 337 337 337 321 321 321 321 321 321 321 321 321 321 321 321 289 289 289 289 49 49 49 49 49 49 49 173 173 173 173 173 173 173 173 173 49 49 49 49 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 271 271 271 271 271 271 271 271 271 271 271 271 271 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 165 224 224 224 224 224 224 224 224 224 224 224 224 107 107 107 107 107 107 107 107 107 107 107 107 107 107 107 107 225 225 225 225 225 225 225 225 225 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 193 193 193 193 193 193 225 225 225 225 225 220 220 220 220 220 220 220 220 220 220 220 220 220 220 220 115 115 115 115 115 277 277 277 277 133 133 133 133 133 133 133 133 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 189 189 189 189 189 221 221 221 221 221 221 49 49 49 49 49 340 340 340 340 340 340 331 331 331 193 193 193 193 193 232 232 232 232 335 335 335 335 335 335 335 320 320 320 320 320 320 320 320 320 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 276 276 276 276 276 276 276 276 187 187 187 187 187 229 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 37 217 217 217 217 217 217 217 217 217 49 49 49 233 233 233 189 189 189 189 189 236 236 236 236 236 236 335 335 335 335 335 335 320 320 320 320 320 227 227 227 227 227 227 101 101 101 101 101 101 101 288 288 288 288 288 288 131 131 131 340 340 340 340 340 340 340 340 340 331 331 331 331 133 133 133 133 224 224 224 224 224 224 187 187 187 187 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 217 217 217 217 217 217 217 217 217 49 49 49 49 232 232 232 232 232 279 279 279 279 279 279 53 53 53 53 229 229 229 229 293 293 293 293 293 293 293 189 189 189 236 236 236 236 236 236 236 331 331 331 331 331 331 331 331 149 149 149 149 149 149 149 292 292 292 292 292 292 292 292 292 331 331 331 331 331 331 331 331 101 101 101 101 101 101 101 101 101 101 101 101 101 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 50 50 50 50 50 50 50 50 50 50 50 50 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 193 193 193 193 193 193 193 193 193 193 193 176 176 176 176 176 176 176 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 288 288 288 288 288 288 288 288 288 288 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 225 225 225 225 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 145 145 145 145 145 145 145 145 145 145 340 340 340 340 340 340 340 340 47 47 47 47 47 233 233 233 116 116 116 116 231 231 231 231 231 231 231 231 21 21 21 21 21 21 21 21 21 21 117 117 117 189 189 189 189 236 236 236 236 236 236 236 236 236 236 271 271 271 271 271 271 271 271 271 271 271 271 271 271 271 225 225 225 225 225 321 321 321 321 321 321 321 321 321 321 321 321 229 229 229 229 229 229 229 229 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 47 233 233 233 116 116 116 50 50 50 50 50 175 175 175 175 175 175 175 175 175 175 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 225 225 225 225 116 116 116 116 116 116 116 116 331 331 331 331 331 331 331 331 331 21 21 21 21 21 21 21 21 21 21 21 112 112 112 112 112 112 112 112 112 112 112 47 47 47 47 233 233 233 116 116 116 116 116 116 116 116 1 1 1 1 1 219 219 219 219 219 219 219 219 219 219 219 219 193 193 193 193 116 116 116 116 116 116 116 116 175 175 175 175 175 175 225 225 225 225 225 225 225 225 225 53 53 53 53 53 53 53 53 329 329 329 329 329 340 340 340 340 340 340 340 340 47 47 47 47 233 233 233 233 233 116 116 116 116 107 107 107 107 107 107 107 107 107 107 107 107 321 321 321 321 321 321 321 321 321 321 321 321 289 289 289 289 289 289 289 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 171 171 171 171 171 171 171 171 171 133 133 133 225 225 225 225 225 225 225 225 225 288 288 288 288 111 111 111 111 111 111 111 111 111 111 111 193 193 193 193 277 277 277 277 277 277 116 116 116 116 116 116 51 51 51 51 51 51 51 51 272 272 272 272 1 1 1 275 275 275 275 275 275 275 275 101 101 101 101 101 101 101 101 101 288 288 288 288 47 47 47 47 47 333 333 333 333 333 333 333 333 164 164 164 164 164 164 164 164 164 164 164 164 164 164 164 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 116 102 102 102 102 102 102 102 102 102 102 127 127 127 233 233 233 233 233 217 217 217 217 217 217 217 265 265 265 265 265 265 265 265 265 265 265 116 116 116 227 227 227 227 227 100 100 100 100 100 100 100 100 287 287 287 287 287 287 287 287 277 277 277 277 193 193 193 272 272 272 272 287 287 287 287 287 287 287 287 188 188 188 119 119 119 119 119 119 204 204 204 204 204 204 204 204 99 99 99 99 99 99 99 99 99 99 99 225 225 225 225 225 225 49 49 49 233 233 233 116 116 116 116 116 331 331 331 189 189 189 292 292 292 292 292 292 292 292 292 1 1 1 67 67 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 227 227 227 227 227 227 227 100 100 100 100 100 100 100 100 100 100 100 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 101 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 331 331 331 331 331 331 53 53 53 53 53 53 53 341 341 341 341 341 49 49 49 233 233 233 288 288 288 288 50 50 50 50 107 107 107 107 107 107 107 107 107 107 107 107 107 107 193 193 193 193 193 193 288 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 279 279 193 193 193 193 193 193 193 193 220 220 220 220 220 219 219 219 219 219 219 219 219 219 53 53 53 53 229 229 229 229 189 189 189 189 236 236 236 236 236 236 236 236 247 247 247 247 247 247 247 247 247 329 329 329 329 329 329 329 144 144 144 144 144 187 187 187 187 232 232 232 119 119 119 48 48 48 48 48 107 107 107 107 107 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 249 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 231 231 231 231 231 101 101 101 101 101 101 101 101 101 101 121 121 121 121 144 144 144 144 144 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 227 227 227 227 193 193 193 193 281 281 281 281 281 189 189 189 340 340 340 340 279 279 279 279 279 279 273 273 273 273 273 133 133 133 133 233 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 281 144 144 144 144 144 144 144 144 144 144 144 144 144 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 71 71 71 71 71 71 71 225 225 225 225 121 121 121 121 121 248 248 248 248 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 215 215 215 215 215 215 215 215 133 133 133 133 233 233 233 233 233 145 145 145 145 145 145 145 145 49 49 49 225 225 225 225 225 225 225 204 204 204 204 204 204 204 204 204 204 204 204 204 204 187 187 187 187 187 187 187 187 187 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 279 279 279 279 279 279 279 133 133 133 133 133 133 116 116 116 116 116 116 283 283 283 283 283 208 208 208 208 208 208 208 179 179 179 179 179 179 37 37 37 37 37 37 117 117 117 117 49 49 49 49 232 232 232 232 232 287 287 287 287 287 287 287 287 287 101 101 101 101 101 101 101 101 101 228 228 228 228 228 228 228 287 287 287 188 188 188 175 175 175 175 175 175 193 193 193 193 193 193 193 193 288 288 288 288 288 279 279 279 279 279 279 279 279 279 279 279 279 193 193 193 193 193 193 193 193 193 220 220 220 220 220 220 220 220 220 1 1 1 331 331 331 331 331 331 331 331 331 21 21 21 21 21 21 113 113 113 113 113 113 113 189 189 189 189 236 236 236 236 287 287 287 287 188 188 188 188 279 279 279 279 279 279 279 279 279 279 279 279 208 208 208 208 208 208 208 208 119 119 119 49 49 49 288 288 288 288 102 102 102 102 102 102 102 102 115 115 115 115 115 115 193 193 193 117 117 117 49 49 49 232 232 232 232 232 171 171 171 171 171 171 171 171 171 171 21 21 21 21 21 21 21 21 224 224 224 224 224 224 224 224 224 224 247 247 247 247 247 247 247 329 329 329 329 329 329 145 145 145 145 145 109 109 109 109 109 109 109 109 73 73 73 73 73 73 73 277 277 277 277 277 277 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 279 279 279 279 279 279 279 279 133 133 133 133 133 133 133 116 116 116 116 116 283 283 283 283 283 283 283 283 283 283 208 208 208 208 231 231 231 231 231 231 231 231 231 231 133 133 133 133 133 133 329 329 329 329 144 144 144 144 144 144 144 144 279 279 279 279 279 279 279 279 279 279 68 68 68 68 68 68 68 68 68 68 119 119 119 119 119 48 48 48 48 48 107 107 107 107 107 107 107 107 107 107 209 209 209 209 209 209 209 288 288 288 288 47 47 47 47 328 328 328 328 328 227 227 227 227 227 208 208 208 208 208 208 171 171 171 171 171 171 144 144 144 271 271 271 271 271 271 271 271 271 271 271 277 277 277 277 277 85 85 85 85 85 85 225 225 225 225 225 225 189 189 189 189 189 189 236 236 236 236 236 236 47 47 47 47 109 109 109 109 109 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 53 53 53 53 288 288 288 191 191 191 191 191 172 172 172 172 172 172 172 172 191 191 191 288 288 288 288 219 219 219 219 219 219 219 219 219 37 37 37 37 37 37 37 37 37 273 273 273 273 273 273 288 288 288 179 179 179 144 144 144 144 171 171 171 171 171 171 171 145 145 145 145 228 228 228 228 228 228 228 107 107 107 107 107 209 209 209 209 209 189 189 189 189 189 189 189 236 236 236 236 236 236 236 236 236 236 236 236 279 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 209 281 281 281 281 281 281 281 281 281 281 281 281 197 197 197 197 197 197 197 197 220 220 220 220 220 220 187 187 187 289 289 289 289 280 280 280 280 280 50 50 50 50 227 227 227 227 227 227 227 227 149 149 149 149 149 149 149 149 149 149 149 281 281 281 281 281 281 281 281 281 281 281 204 204 204 204 204 204 204 102 102 102 102 102 102 102 102 102 115 115 115 115 115 115 115 193 193 193 193 193 116 116 116 116 271 271 271 271 271 271 271 271 271 277 277 277 277 85 85 85 85 85 85 85 85 224 224 224 224 224 224 224 224 187 187 187 187 187 341 341 341 341 341 341 49 49 49 49 233 233 233 233 288 288 288 288 187 187 187 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 102 102 102 102 102 331 331 331 331 331 331 331 331 21 21 21 233 233 233 289 289 289 49 49 49 116 116 116 287 287 287 188 188 188 188 279 279 279 279 279 279 279 279 279 279 279 279 208 208 208 208 208 208 208 208 208 131 131 131 131 131 329 329 329 329 329 329 329 277 277 277 205 205 205 293 293 293 293 293 197 197 197 236 236 236 119 119 119 119 49 49 49 288 288 288 331 331 331 49 49 49 340 340 340 340 340 340 287 287 287 287 287 188 188 188 107 107 107 107 107 107 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 209 209 209 209 209 209 209 209 209 232 232 232 232 232 19 19 19 19 19 19 19 19 232 232 232 119 119 119 119 119 37 37 37 37 37 37 37 37 37 288 288 288 288 107 107 107 107 107 249 249 249 249 249 249 249 249 249 249 249 249 249 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 189 189 189 189 189 221 221 221 221 221 49 49 49 49 49 340 340 340 340 340 102 102 102 102 102 102 102 102 102 115 115 115 115 115 115 193 193 193 193 193 117 117 117 49 49 49 49 49 49 49 232 232 232 231 231 231 231 231 231 231 231 231 231 248 248 248 248 248 248 248 248 248 248 331 331 331 331 331 133 133 133 133 121 121 121 121 121 121 121 144 144 144 144 144 144 99 99 99 99 99 99 99 99 99 99 116 116 116 116 116 116 131 131 131 131 329 329 329 329 329 329 144 144 144 144 144 179 179 179 179 179 179 179 179 179 37 37 37 328 328 328 328 328 47 47 47 47 233 233 233 233 233 233 53 53 53 53 53 53 53 121 121 121 121 121 144 144 144 144 144 144 144 144 144 23 23 23 23 23 273 273 273 273 273 145 145 145 145 145 289 289 289 289 289 289 289 289 289 289 289 289 289 321 321 321 321 321 321 321 233 233 233 233 49 49 49 49 49 289 289 289 289 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 250 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 133 133 133 133 133 133 133 133 133 276 276 276 276 276 146 146 146 146 146 146 146 146 146 146 146 50 50 50 50 50 50 50 50 50 50 50 223 223 223 223 223 223 223 223 223 223 223 223 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 288 288 288 288 288 288 227 227 227 227 227 227 227 227 69 69 69 69 69 69 69 276 276 276 276 276 276 276 276 276 1 1 1 1 1 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 111 133 133 133 133 133 277 277 277 277 277 204 204 204 204 204 204 204 204 204 204 287 287 287 287 287 287 287 287 287 277 277 277 277 209 209 209 209 209 209 340 340 340 340 340 340 340 340 340 340 340 340 67 67 67 67 67 67 67 67 67 67 67 67 67 224 224 224 224 224 224 224 224 187 187 187 187 187 232 232 232 232 232 232 232 232 232 107 107 107 107 107 107 107 225 225 225 225 225 225 225 225 225 225 225 225 225 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 321 228 228 228 228 228 228 228 228 228 228 228 228 228 228 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 193 193 193 193 193 193 193 193 193 193 193 280 280 280 280 280 280 280 280 280 1 1 1 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 225 225 225 225 225 225 225 49 49 49 49 49 49 49 233 233 233 116 116 116 116 116 187 187 187 340 340 340 340 340 340 340 340 119 119 119 119 119 52 52 52 52 52 52 52 52 52 1 1 1 1 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 271 271 271 271 271 271 271 271 271 271 271 271 225 225 225 225 165 165 165 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 102 102 102 102 102 102 102 102 102 215 215 215 215 215 215 215 189 189 189 189 189 281 281 281 281 288 288 288 288 288 288 288 288 223 223 223 223 223 223 53 53 53 53 53 328 328 328 328 328 191 191 191 288 288 288 288 288 288 63 63 63 63 63 63 63 63 63 225 225 225 225 225 225 225 225 277 277 277 277 277 277 277 133 133 133 133 133 133 133 117 117 117 117 117 204 204 204 204 204 204 204 204 204 204 204 35 35 35 233 233 233 116 116 116 99 99 99 99 99 99 228 228 228 228 228 228 228 279 279 279 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 248 248 175 175 175 175 175 175 175 175 175 175 175 225 225 225 225 225 225 225 225 225 225 225 37 37 37 37 37 116 116 116 116 99 99 99 99 99 99 228 228 228 228 228 228 228 228 228 175 175 175 175 175 175 249 249 249 249 249 189 189 189 189 189 189 236 236 236 287 287 287 287 287 48 48 48 48 48 48 223 223 223 223 223 223 223 193 193 193 193 193 328 328 328 328 328 179 179 179 179 179 179 179 209 209 209 209 209 209 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 99 99 99 99 99 99 99 99 99 99 99 328 328 328 328 328 328 328 328 67 67 67 225 225 225 225 333 333 333 333 333 333 333 205 205 205 205 340 340 340 340 340 340 340 179 179 179 179 179 179 179 179 179 149 149 149 149 149 149 116 116 116 116 119 119 119 49 49 49 49 49 288 288 288 288 288 271 271 271 271 271 271 277 277 277 277 193 193 193 233 233 233 233 233 233 233 233 280 280 280 280 280 280 280 131 131 131 131 131 117 117 117 117 117 117 333 333 333 333 145 145 145 145 116 116 116 116 116 116 116 116 116 116 99 99 99 99 99 99 99 99 99 99 99 225 225 225 225 225 49 49 49 49 49 233 233 233 116 116 116 331 331 331 331 331 49 49 49 340 340 340 340 340 340 119 119 119 52 52 52 52 271 271 271 271 271 271 271 271 271 271 271 271 277 277 277 277 193 193 193 289 289 289 205 205 205 205 205 205 205 49 49 49 49 49 49 281 281 281 281 281 281 288 288 288 271 271 271 271 271 271 225 225 225 225 225 165 165 165 165 165 165 165 165 280 280 280 280 280 280 280 280 280 280 280 187 187 187 232 232 232 119 119 119 52 52 52 52 52 331 331 331 331 331 331 331 331 331 331 149 149 149 149 149 225 225 225 225 225 225 225 225 225 225 225 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 47 47 47 47 47 47 47 233 233 233 116 116 116 102 102 102 102 102 102 335 335 335 335 335 335 335 335 335 335 335 321 321 321 321 341 341 341 341 341 341 341 116 116 116 287 287 287 48 48 48 48 48 187 187 187 229 229 229 229 229 229 229 229 37 37 37 37 37 37 37 37 217 217 217 217 217 217 217 217 217 49 49 49 232 232 232 232 232 102 102 102 102 102 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 340 340 223 223 223 223 223 223 223 193 193 193 193 193 329 329 329 329 189 189 189 189 189 236 236 236 236 179 179 179 179 179 179 179 179 179 179 179 209 209 209 209 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 288 288 288 288 102 102 102 102 102 102 102 102 102 102 231 231 231 231 231 231 231 231 133 133 133 133 133 329 329 329 329 329 329 329 144 144 144 144 144 144 144 144 275 275 275 275 275 275 275 275 275 275 275 209 209 209 209 209 209 225 225 225 225 225 225 204 204 204 204 204 204 187 187 187 187 221 221 221 221 221 221 281 281 281 281 281 281 281 273 273 273 273 273 273 133 133 133 133 221 221 221 221 221 289 289 289 289 289 289 49 49 49 116 116 116 116 102 102 102 102 102 102 102 102 102 102 102 102 331 331 331 331 331 331 331 331 331 331 331 331 305 305 305 305 305 305 305 305 305 305 305 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 289 289 289 289 289 289 280 280 280 280 280 280 280 115 115 115 115 115 189 189 189 189 189 189 225 225 225 225 225 225 225 225 101 101 101 101 101 101 101 101 101 101 101 289 289 289 289 289 289 173 173 173 173 173 49 49 49 49 224 224 224 224 224 224 331 331 331 331 331 193 193 193 193 232 232 232 232 335 335 335 335 305 305 305 276 276 276 276 276 276 276 276 187 187 187 187 187 229 229 229 229 229 229 229 41 41 41 41 41 41 41 41 217 217 217 217 217 217 217 217 217 49 49 49 233 233 233 233 233 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 49 49 49 233 233 233 340 340 340 340 340 340 219 219 219 219 219 219 219 219 219 219 53 53 53 53 228 228 228 228 228 228 228 287 287 287 287 287 287 287 287 287 277 277 277 277 320 320 320 320 320 320 320 320 320 320 191 191 191 191 191 341 341 341 341 49 49 49 49 233 233 233 233 233 288 288 288 288 187 187 187 187 187 187 187 187 187 187 187 187 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 53 53 53 53 288 288 288 288 288 119 119 119 119 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 340 340 340 340 340 340 275 275 275 275 275 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 117 117 117 117 117 340 340 340 340 340 146 146 146 146 146 146 279 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 248 248 171 171 171 171 171 171 171 171 171 171 171 171 171 171 171 53 53 53 53 53 53 53 233 233 233 233 233 233 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 133 133 133 232 232 232 331 331 331 208 208 208 208 208 208 208 175 175 175 175 175 175 175 21 21 21 21 21 21 288 288 288 187 187 187 187 187 233 233 233 233 233 233 233 289 289 289 289 289 289 48 48 48 48 119 119 119 119 119 52 52 52 52 52 52 52 52 287 287 287 287 287 287 287 287 287 277 277 277 277 277 165 165 165 165 165 165 165 165 232 232 232 232 232 232 232 35 35 35 288 288 288 283 283 283 283 283 283 283 283 283 283 21 21 21 21 277 277 277 277 277 277 277 225 225 225 225 49 49 49 49 49 49 289 289 289 289 289 289 289 289 289 289 289 289 289 289 89 89 89 89 89 89 89 89 89 89 89 89 232 232 232 232 47 47 47 233 233 233 116 116 116 119 119 119 52 52 52 52 52 52 52 275 275 275 275 275 275 275 133 133 133 133 133 133 116 116 116 116 116 116 116 116 116 116 116 275 275 275 275 275 275 249 249 249 249 249 249 249 249 249 117 117 117 117 117 340 340 340 340 340 107 107 107 107 205 205 205 205 177 177 177 177 177 177 177 37 37 37 37 37 37 37 37 37 232 232 232 232 287 287 287 48 48 48 48 48 171 171 171 171 171 171 171 171 171 225 225 225 225 37 37 37 37 37 37 37 37 37 37 37 37 284 284 284 284 284 284 284 284 284 284 271 271 271 271 271 271 271 271 271 271 271 271 37 37 37 37 37 37 37 37 37 37 37 37 37 37 281 281 281 281 281 281 281 281 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 281 281 281 281 281 281 281 288 288 288 288 227 227 227 227 193 193 193 193 193 193 281 281 281 281 281 281 189 189 189 189 340 340 340 340 279 279 279 279 279 273 273 273 273 273 273 133 133 133 133 233 233 233 233 233 233 233 281 281 281 281 281 281 281 281 144 144 144 144 331 331 331 331 331 331 53 53 53 53 288 288 288 288 288 227 227 227 227 227 165 165 165 165 165 165 165 116 116 116 116 119 119 119 49 49 49 49 228 228 228 228 228 228 228 228 228 275 275 275 275 133 133 133 133 133 133 133 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 233 233 233 116 116 116 116 283 283 283 283 283 283 283 283 283 208 208 208 208 208 208 208 208 279 279 279 279 279 279 279 279 279 279 133 133 133 133 116 116 116 116 116 116 283 283 283 283 283 283 283 208 208 208 208 208 115 115 115 115 115 115 193 193 193 193 193 117 117 117 117 49 49 49 232 232 232 231 231 231 231 231 231 231 248 248 248 248 248 248 47 47 47 47 47 233 233 233 116 116 116 171 171 171 171 144 144 144 144 144 144 271 271 271 271 271 271 271 271 271 271 193 193 193 289 289 289 289 289 205 205 205 205 205 340 340 340 340 340 340 279 279 279 279 279 279 279 279 279 165 165 165 165 165 165 165 220 220 220 220 220 220 231 231 231 231 231 21 21 21 21 21 21 21 21 288 288 288 287 287 287 48 48 48 48 35 35 35 35 35 35 35 35 35 35 35 35 35 281 281 281 281 281 281 281 220 220 220 220 179 179 179 144 144 144 144 131 131 131 131 233 233 233 233 204 204 204 204 227 227 227 227 227 227 227 227 69 69 69 69 276 276 276 276 276 276 219 219 219 219 219 219 219 219 219 333 333 333 133 133 133 133 133 281 281 281 281 281 281 113 113 113 113 113 113 113 113 113 49 49 49 49 49 233 233 233 233 233 233 233 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 283 283 283 283 283 283 283 283 283 283 283 208 208 208 208 208 279 279 279 279 279 279 279 133 133 133 133 116 116 116 102 102 102 102 102 102 102 102 102 227 227 227 227 227 227 227 53 53 53 53 53 53 281 281 281 288 288 288 179 179 179 37 37 37 328 328 328 328 35 35 35 35 35 35 35 35 35 35 35 281 281 281 281 281 288 288 288 288 288 179 179 179 144 144 144 50 50 50 50 50 50 291 291 291 291 291 291 291 291 291 291 291 85 85 85 85 85 85 85 85 85 85 85 85 341 341 341 341 341 341 341 341 49 49 49 233 233 233 116 116 116 116 63 63 63 63 63 63 63 63 225 225 225 225 225 225 225 225 277 277 277 277 277 133 133 133 133 133 133 117 117 117 117 117 117 204 204 204 204 204 204 204 204 204 204 204 204 204 204 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 279 279 279 279 279 279 279 279 49 49 49 49 273 273 273 273 273 273 273 249 249 249 249 249 249 249 249 249 340 340 340 340 340 340 340 340 102 102 102 102 102 102 102 102 102 102 102 102 102 179 179 179 179 179 179 179 179 179 179 179 37 37 37 37 37 37 37 37 37 37 116 116 116 116 116 116 116 287 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 107 107 107 107 107 53 53 53 53 53 53 53 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 179 179 179 179 179 179 179 179 179 179 179 179 84 84 84 84 84 84 84 84 84 84 335 335 335 335 335 335 335 335 335 335 320 320 320 175 175 175 175 175 175 175 249 249 249 189 189 189 189 189 189 236 236 236 287 287 287 287 188 188 188 171 171 171 171 171 171 171 171 101 101 101 101 101 101 101 101 101 101 101 101 233 233 233 233 116 116 116 116 116 83 83 83 83 83 83 83 288 288 288 288 47 47 47 47 109 109 109 109 109 109 85 85 85 85 85 85 85 85 85 85 85 288 288 288 288 288 291 291 291 291 291 193 193 193 237 237 237 237 237 237 237 340 340 340 340 340 340 340 191 191 191 172 172 172 335 335 335 335 320 320 320 115 115 115 115 115 115 115 249 249 249 249 249 249 249 233 233 233 288 288 288 288 35 35 35 35 35 35 35 35 281 281 281 281 281 281 281 281 220 220 220 219 219 219 333 333 333 333 133 133 133 133 133 281 281 281 281 281 281 281 281 113 113 113 113 113 113 113 113 49 49 49 49 49 49 233 233 233 233 233 233 233 233 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 233 233 233 116 116 116 331 331 331 53 53 53 53 53 53 53 53 288 288 288 288 288 288 288 288 288 288 115 115 115 115 115 115 115 115 53 53 53 53 53 53 53 53 53 53 340 340 340 340 340 340 340 340 340 340 340 340 227 227 227 227 227 165 165 165 165 165 165 220 220 220 220 220 220 220 220 119 119 119 119 48 48 48 48 48 48 275 275 275 275 275 275 275 275 275 249 249 249 249 249 249 249 249 117 117 117 117 117 117 340 340 340 340 340 340 340 340 340 275 275 275 275 275 275 133 133 133 133 133 133 133 133 116 116 116 116 116 116 116 116 116 116 116 116 116 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 331 133 133 133 133 133 133 133 133 133 224 224 224 224 224 224 224 231 231 231 231 231 231 231 231 231 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 84 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 102 115 115 115 115 115 49 49 49 49 49 233 233 233 233 233 233 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 248 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 279 279 279 279 279 279 279 279 279 279 279 279 279 279 133 133 133 133 116 116 116 116 116 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 337 337 337 337 337 337 337 316 316 316 316 316 316 316 316 316 316 316 316 316 316 316 316 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 119 119 119 119 119 119 119 133 133 133 277 277 277 116 116 116 107 107 107 107 208 208 208 208 208 208 231 231 231 231 231 231 231 231 248 248 248 248 248 248 248 279 279 279 279 279 279 279 279 279 221 221 221 221 221 221 221 249 249 249 249 249 249 272 272 272 171 171 171 171 171 171 171 171 144 144 144 144 187 187 187 187 187 187 187 187 187 187 229 229 229 229 229 229 41 41 41 41 41 41 41 41 217 217 217 217 217 217 217 217 49 49 49 233 233 233 233 233 165 165 165 165 165 165 165 165 285 285 285 285 285 285 285 285 285 285 285 285 285 49 49 49 49 49 232 232 232 232 232 232 119 119 119 119 133 133 133 133 133 133 133 133 133 232 232 232 232 232 232 232 232 232 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 305 305 305 305 116 116 116 116 116 116 116 116 116 119 119 119 133 133 133 133 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 276 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 53 53 53 288 288 288 288 35 35 35 35 35 35 228 228 228 228 228 102 102 102 102 102 102 102 102 1 1 1 287 287 287 287 287 287 287 287 69 69 69 69 69 69 69 221 221 221 221 221 221 189 189 189 189 236 236 236 236 287 287 287 287 287 287 287 287 320 320 320 320 320 227 227 227 227 227 227 53 53 53 53 53 53 53 53 53 53 53 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 112 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 271 271 271 271 271 271 271 209 209 209 209 209 209 209 273 273 273 49 49 49 224 224 224 224 224 224 224 19 19 19 19 19 19 19 276 276 276 276 276 276 276 67 67 67 67 67 225 225 225 333 333 333 333 333 205 205 205 205 340 340 340 340 340 287 287 287 287 287 287 287 287 133 133 133 225 225 225 225 225 189 189 189 189 189 189 236 236 236 236 227 227 227 227 227 227 208 208 208 208 208 208 208 208 208 102 102 102 102 102 102 102 102 102 102 102 115 115 115 115 115 115 115 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 320 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 305 305 305 116 116 116 116 335 335 335 335 335 320 320 320 320 320 275 275 275 275 275 275 37 37 37 37 37 37 37 37 37 37 121 121 121 121 121 144 144 144 144 144 144 144 102 102 102 102 102 102 102 102 115 115 115 115 115 115 115 193 193 193 193 193 117 117 117 49 49 49 232 232 232 232 287 287 287 287 287 287 287 287 287 287 69 69 69 69 69 69 69 69 69 69 69 69 69 69 69 220 220 220 220 220 220 220 220 220 220 220 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 187 187 187 187 187 187 187 187 187 187 172 172 172 172 172 172 172 172 335 335 335 335 320 320 320 320 279 279 279 279 279 279 279 279 279 279 279 279 164 164 164 164 164 164 164 164 164 279 279 279 279 279 279 279 279 279 279 279 248 248 248 248 248 248 248 99 99 99 99 99 224 224 224 224 224 224 224 279 279 279 279 279 279 279 279 279 289 289 289 289 289 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 272 272 272 272 272 272 272 272 272 272 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 102 102 102 102 102 102 102 102 102 102 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 219 37 37 37 37 37 37 37 37 37 232 232 232 232 232 232 232 232 232 279 279 279 279 279 279 279 289 289 289 289 289 21 21 21 21 21 21 21 21 21 21 272 272 272 272 272 331 331 331 133 133 133 232 232 232 232 102 102 102 102 102 102 102 227 227 227 227 227 227 227 227 227 165 165 165 165 165 165 220 220 220 220 220 220 220 220 51 51 51 51 51 51 272 272 272 272 227 227 227 227 227 100 100 100 100 100 100 100 100 227 227 227 227 227 227 227 227 227 227 101 101 101 101 101 101 101 101 101 101 233 233 233 116 116 116 287 287 287 287 287 287 287 287 320 320 320 320 320 320 320 187 187 187 187 187 187 288 288 288 288 288 288 71 71 71 71 71 71 225 225 225 225 121 121 121 121 248 248 248 248 248 248 248 248 248 248 248 187 187 187 187 187 187 289 289 289 289 289 280 280 280 280 280 280 280 115 115 115 115 115 115 193 193 193 193 193 173 173 173 173 173 173 173 49 49 49 49 221 221 221 221 221 49 49 49 49 49 225 225 225 225 225 225 225 225 225 225 225 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 227 227 227 227 227 227 227 227 227 227 227 227 227 37 37 37 37 37 37 37 37 37 37 37 37 37 293 293 293 293 293 293 293 293 293 293 293 337 337 337 337 337 337 337 316 316 316 316 316 316 316 316 316 316 316 316 316 316 316 316 316 316 316 316 316 316 1 1 1 1 1 1 1 1 1 1 1 1 1 +1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 49 49 49 340 340 340 340 340 340 340 187 187 187 233 233 233 233 233 233 217 217 217 217 217 217 217 265 265 265 265 265 265 265 189 189 189 189 189 189 189 189 236 236 236 236 236 236 179 179 179 179 189 189 189 229 229 229 229 229 229 281 281 281 281 281 281 281 281 133 133 133 133 225 225 225 225 225 225 225 225 225 225 225 225 225 172 172 172 172 172 172 172 172 172 172 172 172 172 172 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 223 223 223 223 223 223 223 223 101 101 101 101 101 101 101 101 101 101 220 220 220 220 220 220 227 227 227 227 227 227 227 227 227 249 249 249 249 249 249 249 249 249 249 281 281 281 281 281 281 281 288 288 288 288 288 288 288 219 219 219 219 219 219 219 219 333 333 333 333 333 333 101 101 101 101 101 101 101 101 101 101 49 49 49 49 288 288 288 288 288 171 171 171 171 171 171 171 171 171 249 249 249 249 249 249 249 249 249 249 249 249 221 221 221 221 221 221 221 221 280 280 280 280 280 280 280 179 179 179 179 179 179 208 208 208 208 208 208 208 208 223 223 223 223 223 223 223 101 101 101 101 101 101 101 101 221 221 221 221 221 221 288 288 288 287 287 287 287 287 287 287 69 69 69 69 69 69 69 69 69 69 69 221 221 221 221 221 49 49 49 289 289 289 289 189 189 189 189 328 328 328 328 271 271 271 271 271 271 271 271 271 271 209 209 209 209 209 209 209 209 209 273 273 273 273 273 273 49 49 49 49 49 224 224 224 224 224 224 224 331 331 331 331 331 133 133 133 133 133 232 232 232 232 232 232 119 119 119 164 164 164 164 164 164 331 331 331 331 331 331 331 331 331 144 144 144 144 144 144 331 331 331 331 331 331 331 193 193 193 193 225 225 225 225 225 225 189 189 189 189 189 189 236 236 236 236 236 236 236 287 287 287 287 287 287 188 188 188 115 115 115 115 115 115 115 320 320 320 320 320 320 320 320 119 119 119 119 48 48 48 48 287 287 287 287 287 287 287 287 69 69 69 69 69 69 69 69 69 69 69 221 221 221 221 221 221 221 221 189 189 189 236 236 236 236 236 236 119 119 119 119 49 49 49 49 229 229 229 229 229 281 281 281 281 281 281 281 281 281 281 281 281 133 133 133 133 133 133 225 225 225 225 225 225 225 225 225 225 329 329 329 329 329 340 340 340 340 340 340 340 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 35 35 35 35 35 35 233 233 233 116 116 116 115 115 115 115 193 193 193 116 116 116 231 231 231 231 231 231 21 21 21 21 21 21 288 288 288 288 288 187 187 187 221 221 221 221 221 221 281 281 281 281 281 281 273 273 273 273 273 133 133 133 133 133 133 221 221 221 221 221 221 288 288 288 288 288 288 187 187 187 228 228 228 228 228 228 287 287 287 287 287 188 188 188 219 219 219 219 219 219 219 219 219 219 219 219 209 209 209 209 209 209 209 272 272 272 272 272 272 272 51 51 51 51 51 51 51 51 51 51 272 272 272 272 179 179 179 189 189 189 189 189 340 340 340 340 340 340 340 340 340 131 131 131 131 131 233 233 233 233 233 233 116 116 116 116 116 47 47 47 328 328 328 328 328 328 328 191 191 191 191 288 288 288 288 288 288 288 288 288 288 288 288 288 288 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 288 288 288 288 179 179 179 179 179 179 208 208 208 179 179 179 37 37 37 116 116 116 231 231 231 231 231 231 231 231 231 133 133 133 133 133 133 329 329 329 329 329 329 329 329 144 144 144 144 144 187 187 187 187 187 221 221 221 221 221 221 221 281 281 281 281 281 281 281 281 281 273 273 273 273 273 133 133 133 133 133 133 221 221 221 221 221 289 289 289 289 289 289 189 189 189 116 116 116 287 287 287 287 287 320 320 320 320 320 320 320 320 187 187 187 233 233 233 233 233 233 217 217 217 217 217 217 264 264 264 264 264 264 264 264 264 264 264 119 119 119 119 119 52 52 52 52 52 279 279 279 279 279 279 279 49 49 49 49 281 281 281 281 281 281 281 281 281 281 281 281 281 101 101 101 101 101 101 101 101 101 101 101 101 101 49 49 49 289 289 289 289 289 289 289 289 289 204 204 204 204 47 47 47 47 328 328 328 328 328 328 328 50 50 50 50 50 223 223 223 223 193 193 193 289 289 289 289 289 49 49 49 49 224 224 224 224 175 175 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 224 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 331 331 331 331 331 331 331 331 331 331 331 193 193 193 193 229 229 229 229 229 229 229 49 49 49 49 49 232 232 232 232 232 232 232 232 232 331 331 331 331 144 144 144 144 144 107 107 107 107 107 107 107 37 37 37 37 37 37 37 37 37 37 116 116 116 116 116 187 187 187 187 233 233 233 233 233 233 233 53 53 53 53 53 53 53 53 53 172 172 172 172 172 172 172 187 187 187 232 232 232 232 232 232 67 67 67 67 67 67 67 67 224 224 224 224 224 224 219 219 219 219 219 219 219 219 219 21 21 21 21 21 21 21 21 233 233 233 233 233 233 285 285 285 285 285 285 285 285 49 49 49 49 49 233 233 233 233 233 233 233 280 280 280 280 280 280 280 280 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 107 107 107 107 107 107 53 53 53 53 53 53 288 288 288 288 288 223 223 223 223 223 223 193 193 193 289 289 289 49 49 49 49 49 224 224 224 224 224 224 224 175 175 175 175 175 175 175 149 149 149 149 149 149 149 149 149 149 149 149 149 149 225 225 225 225 225 225 225 225 225 225 225 225 225 225 340 340 340 340 340 340 340 340 340 340 340 340 340 340 340 331 331 331 331 331 331 144 144 144 144 144 144 144 331 331 331 331 331 331 331 331 331 331 331 331 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 149 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 280 1 1 1 1 1 diff --git a/SpeechT5/SpeechLM/dataset/LibriSpeech/phone_unit/train_sample100.tsv b/SpeechT5/SpeechLM/dataset/LibriSpeech/phone_unit/train_sample100.tsv new file mode 100644 index 0000000000000000000000000000000000000000..77da1d382251d74e02d73cce8e2e35ec6e1f8f0c --- /dev/null +++ b/SpeechT5/SpeechLM/dataset/LibriSpeech/phone_unit/train_sample100.tsv @@ -0,0 +1,101 @@ +/LocalData/dataset/LibriSpeech +train-clean-100/103/1240/103-1240-0000.flac 225360 +train-clean-100/103/1240/103-1240-0001.flac 255120 +train-clean-100/103/1240/103-1240-0002.flac 223120 +train-clean-100/103/1240/103-1240-0003.flac 235360 +train-clean-100/103/1240/103-1240-0004.flac 200240 +train-clean-100/103/1240/103-1240-0005.flac 242800 +train-clean-100/103/1240/103-1240-0006.flac 153280 +train-clean-100/103/1240/103-1240-0007.flac 240560 +train-clean-100/103/1240/103-1240-0008.flac 246960 +train-clean-100/103/1240/103-1240-0009.flac 160480 +train-clean-100/103/1240/103-1240-0010.flac 236880 +train-clean-100/103/1240/103-1240-0011.flac 234480 +train-clean-100/103/1240/103-1240-0012.flac 243040 +train-clean-100/103/1240/103-1240-0013.flac 244160 +train-clean-100/103/1240/103-1240-0014.flac 223360 +train-clean-100/103/1240/103-1240-0015.flac 60960 +train-clean-100/103/1240/103-1240-0016.flac 250640 +train-clean-100/103/1240/103-1240-0017.flac 229040 +train-clean-100/103/1240/103-1240-0018.flac 185760 +train-clean-100/103/1240/103-1240-0019.flac 246480 +train-clean-100/103/1240/103-1240-0020.flac 214640 +train-clean-100/103/1240/103-1240-0021.flac 236960 +train-clean-100/103/1240/103-1240-0022.flac 262000 +train-clean-100/103/1240/103-1240-0023.flac 194400 +train-clean-100/103/1240/103-1240-0024.flac 244320 +train-clean-100/103/1240/103-1240-0025.flac 241920 +train-clean-100/103/1240/103-1240-0026.flac 133360 +train-clean-100/103/1240/103-1240-0027.flac 223440 +train-clean-100/103/1240/103-1240-0028.flac 250400 +train-clean-100/103/1240/103-1240-0029.flac 244320 +train-clean-100/103/1240/103-1240-0030.flac 232320 +train-clean-100/103/1240/103-1240-0031.flac 269760 +train-clean-100/103/1240/103-1240-0032.flac 236400 +train-clean-100/103/1240/103-1240-0033.flac 230640 +train-clean-100/103/1240/103-1240-0034.flac 246480 +train-clean-100/103/1240/103-1240-0035.flac 256720 +train-clean-100/103/1240/103-1240-0036.flac 200320 +train-clean-100/103/1240/103-1240-0037.flac 237040 +train-clean-100/103/1240/103-1240-0038.flac 114480 +train-clean-100/103/1240/103-1240-0039.flac 230800 +train-clean-100/103/1240/103-1240-0040.flac 234720 +train-clean-100/103/1240/103-1240-0041.flac 216160 +train-clean-100/103/1240/103-1240-0042.flac 249680 +train-clean-100/103/1240/103-1240-0043.flac 236160 +train-clean-100/103/1240/103-1240-0044.flac 262240 +train-clean-100/103/1240/103-1240-0045.flac 250800 +train-clean-100/103/1240/103-1240-0046.flac 222800 +train-clean-100/103/1240/103-1240-0047.flac 206320 +train-clean-100/103/1240/103-1240-0048.flac 236320 +train-clean-100/103/1240/103-1240-0049.flac 244560 +train-clean-100/103/1240/103-1240-0050.flac 224400 +train-clean-100/103/1240/103-1240-0051.flac 245760 +train-clean-100/103/1240/103-1240-0052.flac 236640 +train-clean-100/103/1240/103-1240-0053.flac 218640 +train-clean-100/103/1240/103-1240-0054.flac 261360 +train-clean-100/103/1240/103-1240-0055.flac 179920 +train-clean-100/103/1240/103-1240-0056.flac 229040 +train-clean-100/103/1240/103-1240-0057.flac 109680 +train-clean-100/103/1241/103-1241-0000.flac 255440 +train-clean-100/103/1241/103-1241-0001.flac 248800 +train-clean-100/103/1241/103-1241-0002.flac 249040 +train-clean-100/103/1241/103-1241-0003.flac 222160 +train-clean-100/103/1241/103-1241-0004.flac 236080 +train-clean-100/103/1241/103-1241-0005.flac 224400 +train-clean-100/103/1241/103-1241-0006.flac 243760 +train-clean-100/103/1241/103-1241-0007.flac 242320 +train-clean-100/103/1241/103-1241-0008.flac 242160 +train-clean-100/103/1241/103-1241-0009.flac 222400 +train-clean-100/103/1241/103-1241-0010.flac 253920 +train-clean-100/103/1241/103-1241-0011.flac 231760 +train-clean-100/103/1241/103-1241-0012.flac 239680 +train-clean-100/103/1241/103-1241-0013.flac 236960 +train-clean-100/103/1241/103-1241-0014.flac 242080 +train-clean-100/103/1241/103-1241-0015.flac 224160 +train-clean-100/103/1241/103-1241-0016.flac 234640 +train-clean-100/103/1241/103-1241-0017.flac 254240 +train-clean-100/103/1241/103-1241-0018.flac 150960 +train-clean-100/103/1241/103-1241-0019.flac 48400 +train-clean-100/103/1241/103-1241-0020.flac 155360 +train-clean-100/103/1241/103-1241-0021.flac 242880 +train-clean-100/103/1241/103-1241-0022.flac 261600 +train-clean-100/103/1241/103-1241-0023.flac 266720 +train-clean-100/103/1241/103-1241-0024.flac 254240 +train-clean-100/103/1241/103-1241-0025.flac 77280 +train-clean-100/103/1241/103-1241-0026.flac 176080 +train-clean-100/103/1241/103-1241-0027.flac 238080 +train-clean-100/103/1241/103-1241-0028.flac 248880 +train-clean-100/103/1241/103-1241-0029.flac 244960 +train-clean-100/103/1241/103-1241-0030.flac 247520 +train-clean-100/103/1241/103-1241-0031.flac 209600 +train-clean-100/103/1241/103-1241-0032.flac 224080 +train-clean-100/103/1241/103-1241-0033.flac 251920 +train-clean-100/103/1241/103-1241-0034.flac 270560 +train-clean-100/103/1241/103-1241-0035.flac 248800 +train-clean-100/103/1241/103-1241-0036.flac 249040 +train-clean-100/103/1241/103-1241-0037.flac 204400 +train-clean-100/103/1241/103-1241-0038.flac 238960 +train-clean-100/103/1241/103-1241-0039.flac 258160 +train-clean-100/103/1241/103-1241-0040.flac 220560 +train-clean-100/103/1241/103-1241-0041.flac 252240 diff --git a/SpeechT5/SpeechLM/modules.py b/SpeechT5/SpeechLM/modules.py new file mode 100644 index 0000000000000000000000000000000000000000..2841868b315cee2e3f8d7c072488d840bbaa8ab7 --- /dev/null +++ b/SpeechT5/SpeechLM/modules.py @@ -0,0 +1,2130 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- +""" +We just merge all the required modules and functions into one python file. +It is for easily use the pre-trained model to extract features. +""" +import math +import numpy as np +import logging +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import Parameter +from torch import Tensor +from typing import Any, Dict, List, Tuple, Callable, Optional + +logger = logging.getLogger(__name__) + +# rewrite name for backward compatibility in `make_generation_fast_` +def module_name_fordropout(module_name: str) -> str: + if module_name == "TransformerEncoderBase": + return "TransformerEncoder" + else: + return module_name + +def utils_make_positions(tensor, padding_idx: int, onnx_trace: bool = False): + """Replace non-padding symbols with their position numbers. + + Position numbers begin at padding_idx+1. Padding symbols are ignored. + """ + # The series of casts and type-conversions here are carefully + # balanced to both work with ONNX export and XLA. In particular XLA + # prefers ints, cumsum defaults to output longs, and ONNX doesn't know + # how to handle the dtype kwarg in cumsum. + mask = tensor.ne(padding_idx).int() + return (torch.cumsum(mask, dim=1).type_as(mask) * mask).long() + padding_idx + +def utils_item(tensor): + # tpu-comment: making this a no-op for xla devices. + if torch.is_tensor(tensor) and tensor.device.type == "xla": + return tensor.detach() + if hasattr(tensor, "item"): + return tensor.item() + if hasattr(tensor, "__getitem__"): + return tensor[0] + return tensor + +def fsdp_wrap(module, min_num_params: Optional[int] = None, **kwargs): + """ + Helper to wrap layers/modules in FSDP. This falls back to a no-op if + fairscale is not available. + + Args: + module (nn.Module): module to (maybe) wrap + min_num_params (int, Optional): minimum number of layer params to wrap + """ + try: + from fairscale.nn import wrap + + if min_num_params is not None: + num_params = sum(p.numel() for p in module.parameters()) + if num_params >= min_num_params: + return wrap(module, **kwargs) + else: + return module + else: + return wrap(module, **kwargs) + except ImportError: + return module + +def quant_noise(module, p, block_size): + """ + Wraps modules and applies quantization noise to the weights for + subsequent quantization with Iterative Product Quantization as + described in "Training with Quantization Noise for Extreme Model Compression" + + Args: + - module: nn.Module + - p: amount of Quantization Noise + - block_size: size of the blocks for subsequent quantization with iPQ + + Remarks: + - Module weights must have the right sizes wrt the block size + - Only Linear, Embedding and Conv2d modules are supported for the moment + - For more detail on how to quantize by blocks with convolutional weights, + see "And the Bit Goes Down: Revisiting the Quantization of Neural Networks" + - We implement the simplest form of noise here as stated in the paper + which consists in randomly dropping blocks + """ + + # if no quantization noise, don't register hook + if p <= 0: + return module + + # supported modules + assert isinstance(module, (nn.Linear, nn.Embedding, nn.Conv2d)) + + # test whether module.weight has the right sizes wrt block_size + is_conv = module.weight.ndim == 4 + + # 2D matrix + if not is_conv: + assert ( + module.weight.size(1) % block_size == 0 + ), "Input features must be a multiple of block sizes" + + # 4D matrix + else: + # 1x1 convolutions + if module.kernel_size == (1, 1): + assert ( + module.in_channels % block_size == 0 + ), "Input channels must be a multiple of block sizes" + # regular convolutions + else: + k = module.kernel_size[0] * module.kernel_size[1] + assert k % block_size == 0, "Kernel size must be a multiple of block size" + + def _forward_pre_hook(mod, input): + # no noise for evaluation + if mod.training: + if not is_conv: + # gather weight and sizes + weight = mod.weight + in_features = weight.size(1) + out_features = weight.size(0) + + # split weight matrix into blocks and randomly drop selected blocks + mask = torch.zeros( + in_features // block_size * out_features, device=weight.device + ) + mask.bernoulli_(p) + mask = mask.repeat_interleave(block_size, -1).view(-1, in_features) + + else: + # gather weight and sizes + weight = mod.weight + in_channels = mod.in_channels + out_channels = mod.out_channels + + # split weight matrix into blocks and randomly drop selected blocks + if mod.kernel_size == (1, 1): + mask = torch.zeros( + int(in_channels // block_size * out_channels), + device=weight.device, + ) + mask.bernoulli_(p) + mask = mask.repeat_interleave(block_size, -1).view(-1, in_channels) + else: + mask = torch.zeros( + weight.size(0), weight.size(1), device=weight.device + ) + mask.bernoulli_(p) + mask = ( + mask.unsqueeze(2) + .unsqueeze(3) + .repeat(1, 1, mod.kernel_size[0], mod.kernel_size[1]) + ) + + # scale weights and apply mask + mask = mask.to( + torch.bool + ) # x.bool() is not currently supported in TorchScript + s = 1 / (1 - p) + mod.weight.data = s * weight.masked_fill(mask, 0) + + module.register_forward_pre_hook(_forward_pre_hook) + return module + +def relu_squared(x: torch.Tensor): + return F.relu(x).pow(2) + +def gelu(x: torch.Tensor) -> torch.Tensor: + return torch.nn.functional.gelu(x.float()).type_as(x) + +def gelu_accurate(x): + if not hasattr(gelu_accurate, "_a"): + gelu_accurate._a = math.sqrt(2 / math.pi) + return ( + 0.5 * x * (1 + torch.tanh(gelu_accurate._a * (x + 0.044715 * torch.pow(x, 3)))) + ) + +def get_activation_fn(activation: str) -> Callable: + """Returns the activation function corresponding to `activation`""" + if activation == "relu": + return F.relu + elif activation == "relu_squared": + return relu_squared + elif activation == "gelu": + return gelu + elif activation == "gelu_fast": + logger.warn( + "--activation-fn=gelu_fast has been renamed to gelu_accurate" + ) + return gelu_accurate + elif activation == "gelu_accurate": + return gelu_accurate + elif activation == "tanh": + return torch.tanh + elif activation == "linear": + return lambda x: x + elif activation == "swish": + return torch.nn.SiLU + else: + raise RuntimeError("--activation-fn {} not supported".format(activation)) + +def softmax(x, dim: int, onnx_trace: bool = False): + if onnx_trace: + return F.softmax(x.float(), dim=dim) + else: + return F.softmax(x, dim=dim, dtype=torch.float32) + +def compute_mask_indices( + shape: Tuple[int, int], + padding_mask: Optional[torch.Tensor], + mask_prob: float, + mask_length: int, + mask_type: str = "static", + mask_other: float = 0.0, + min_masks: int = 0, + no_overlap: bool = False, + min_space: int = 0, + require_same_masks: bool = True, + mask_dropout: float = 0.0, +) -> np.ndarray: + """ + Computes random mask spans for a given shape + + Args: + shape: the the shape for which to compute masks. + should be of size 2 where first element is batch size and 2nd is timesteps + padding_mask: optional padding mask of the same size as shape, which will prevent masking padded elements + mask_prob: probability for each token to be chosen as start of the span to be masked. this will be multiplied by + number of timesteps divided by length of mask span to mask approximately this percentage of all elements. + however due to overlaps, the actual number will be smaller (unless no_overlap is True) + mask_type: how to compute mask lengths + static = fixed size + uniform = sample from uniform distribution [mask_other, mask_length*2] + normal = sample from normal distribution with mean mask_length and stdev mask_other. mask is min 1 element + poisson = sample from possion distribution with lambda = mask length + min_masks: minimum number of masked spans + no_overlap: if false, will switch to an alternative recursive algorithm that prevents spans from overlapping + min_space: only used if no_overlap is True, this is how many elements to keep unmasked between spans + require_same_masks: if true, will randomly drop out masks until same amount of masks remains in each sample + mask_dropout: randomly dropout this percentage of masks in each example + """ + + bsz, all_sz = shape + mask = np.full((bsz, all_sz), False) + + all_num_mask = int( + # add a random number for probabilistic rounding + mask_prob * all_sz / float(mask_length) + + np.random.rand() + ) + + all_num_mask = max(min_masks, all_num_mask) + + mask_idcs = [] + for i in range(bsz): + if padding_mask is not None: + sz = all_sz - padding_mask[i].long().sum().item() + num_mask = int( + # add a random number for probabilistic rounding + mask_prob * sz / float(mask_length) + + np.random.rand() + ) + num_mask = max(min_masks, num_mask) + else: + sz = all_sz + num_mask = all_num_mask + + if mask_type == "static": + lengths = np.full(num_mask, mask_length) + elif mask_type == "uniform": + lengths = np.random.randint(mask_other, mask_length * 2 + 1, size=num_mask) + elif mask_type == "normal": + lengths = np.random.normal(mask_length, mask_other, size=num_mask) + lengths = [max(1, int(round(x))) for x in lengths] + elif mask_type == "poisson": + lengths = np.random.poisson(mask_length, size=num_mask) + lengths = [int(round(x)) for x in lengths] + else: + raise Exception("unknown mask selection " + mask_type) + + if sum(lengths) == 0: + lengths[0] = min(mask_length, sz - 1) + + if no_overlap: + mask_idc = [] + + def arrange(s, e, length, keep_length): + span_start = np.random.randint(s, e - length) + mask_idc.extend(span_start + i for i in range(length)) + + new_parts = [] + if span_start - s - min_space >= keep_length: + new_parts.append((s, span_start - min_space + 1)) + if e - span_start - keep_length - min_space > keep_length: + new_parts.append((span_start + length + min_space, e)) + return new_parts + + parts = [(0, sz)] + min_length = min(lengths) + for length in sorted(lengths, reverse=True): + lens = np.fromiter( + (e - s if e - s >= length + min_space else 0 for s, e in parts), + np.int, + ) + l_sum = np.sum(lens) + if l_sum == 0: + break + probs = lens / np.sum(lens) + c = np.random.choice(len(parts), p=probs) + s, e = parts.pop(c) + parts.extend(arrange(s, e, length, min_length)) + mask_idc = np.asarray(mask_idc) + else: + min_len = min(lengths) + if sz - min_len <= num_mask: + min_len = sz - num_mask - 1 + + mask_idc = np.random.choice(sz - min_len, num_mask, replace=False) + + mask_idc = np.asarray( + [ + mask_idc[j] + offset + for j in range(len(mask_idc)) + for offset in range(lengths[j]) + ] + ) + + mask_idcs.append(np.unique(mask_idc[mask_idc < sz])) + + min_len = min([len(m) for m in mask_idcs]) + for i, mask_idc in enumerate(mask_idcs): + if len(mask_idc) > min_len and require_same_masks: + mask_idc = np.random.choice(mask_idc, min_len, replace=False) + if mask_dropout > 0: + num_holes = np.rint(len(mask_idc) * mask_dropout).astype(int) + mask_idc = np.random.choice( + mask_idc, len(mask_idc) - num_holes, replace=False + ) + + mask[i, mask_idc] = True + + return mask + +def init_bert_params(module): + """ + Initialize the weights specific to the BERT Model. + This overrides the default initializations depending on the specified arguments. + 1. If normal_init_linear_weights is set then weights of linear + layer will be initialized using the normal distribution and + bais will be set to the specified value. + 2. If normal_init_embed_weights is set then weights of embedding + layer will be initialized using the normal distribution. + 3. If normal_init_proj_weights is set then weights of + in_project_weight for MultiHeadAttention initialized using + the normal distribution (to be validated). + """ + + def normal_(data): + # with FSDP, module params will be on CUDA, so we cast them back to CPU + # so that the RNG is consistent with and without FSDP + data.copy_(data.cpu().normal_(mean=0.0, std=0.02).to(data.device)) + + if isinstance(module, nn.Linear): + normal_(module.weight.data) + if module.bias is not None: + module.bias.data.zero_() + if isinstance(module, nn.Embedding): + normal_(module.weight.data) + if module.padding_idx is not None: + module.weight.data[module.padding_idx].zero_() + if isinstance(module, MultiheadAttention): + normal_(module.q_proj.weight.data) + normal_(module.k_proj.weight.data) + normal_(module.v_proj.weight.data) + +def pad_to_multiple(x, multiple, dim=-1, value=0): + # Inspired from https://github.com/lucidrains/local-attention/blob/master/local_attention/local_attention.py#L41 + if x is None: + return None, 0 + tsz = x.size(dim) + m = tsz / multiple + remainder = math.ceil(m) * multiple - tsz + if m.is_integer(): + return x, 0 + pad_offset = (0,) * (-1 - dim) * 2 + + return F.pad(x, (*pad_offset, 0, remainder), value=value), remainder + +def is_xla_tensor(tensor): + return torch.is_tensor(tensor) and tensor.device.type == "xla" + +def index_put(tensor, indices, value): + if is_xla_tensor(tensor): + for _ in range(indices.dim(), tensor.dim()): + indices = indices.unsqueeze(-1) + if indices.size(-1) < tensor.size(-1): + indices = indices.expand_as(tensor) + tensor = torch.mul(tensor, ~indices) + torch.mul(value, indices) + else: + tensor[indices] = value + return tensor + +def PositionalEmbedding( + num_embeddings: int, + embedding_dim: int, + padding_idx: int, + learned: bool = False, +): + if learned: + # if padding_idx is specified then offset the embedding ids by + # this index and adjust num_embeddings appropriately + # TODO: The right place for this offset would be inside + # LearnedPositionalEmbedding. Move this there for a cleaner implementation. + if padding_idx is not None: + num_embeddings = num_embeddings + padding_idx + 1 + m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + if padding_idx is not None: + nn.init.constant_(m.weight[padding_idx], 0) + else: + m = SinusoidalPositionalEmbedding( + embedding_dim, + padding_idx, + init_size=num_embeddings + padding_idx + 1, + ) + return m + +def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False): + if torch.jit.is_scripting() or torch.jit.is_tracing(): + export = True + if not export and torch.cuda.is_available() and has_fused_layernorm: + return FusedLayerNorm(normalized_shape, eps, elementwise_affine) + return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine) + + +class TransformerEncoderBase(nn.Module): + """ + Transformer encoder consisting of *cfg.encoder.layers* layers. Each layer + is a :class:`TransformerEncoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary: deprecated(None) + embed_tokens (torch.nn.Embedding): input embedding + """ + + def __init__(self, cfg, dictionary, embed_tokens, use_rel_pos_enc=False, scaling_for_att=1.0): + self.cfg = cfg + super().__init__() + self.register_buffer("version", torch.Tensor([3])) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.encoder_layerdrop = cfg.encoder.layerdrop + + embed_dim = embed_tokens.embedding_dim if embed_tokens is not None else cfg.encoder.embed_dim + self.padding_idx = embed_tokens.padding_idx if embed_tokens is not None else 1 + self.max_source_positions = cfg.max_source_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + self.embed_positions = ( + PositionalEmbedding( + cfg.max_source_positions, + embed_dim, + self.padding_idx, + learned=cfg.encoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = quant_noise( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + if self.encoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.encoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.use_rel_pos_enc = use_rel_pos_enc + self.scaling_for_att = scaling_for_att + self.layers.extend( + [self.build_encoder_layer(cfg) for i in range(cfg.encoder.layers)] + ) + self.num_layers = len(self.layers) + + if cfg.encoder.normalize_before: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(embed_dim // cfg.encoder.attention_heads, 160) + + def build_encoder_layer(self, cfg): + layer = TransformerEncoderLayerBase(cfg, has_relative_attention_bias=self.use_rel_pos_enc, scaling_for_att=self.scaling_for_att) + checkpoint = cfg.checkpoint_activations + if checkpoint: + raise ValueError("We don't support checkpoint_activations for now! Please set cfg.checkpoint_activations=False.") + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward_embedding( + self, src_tokens, token_embedding: Optional[torch.Tensor] = None + ): + # embed tokens and positions + if token_embedding is None: + token_embedding = self.embed_tokens(src_tokens) + x = embed = self.embed_scale * token_embedding + if self.embed_positions is not None: + x = embed + self.embed_positions(src_tokens) + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + x = self.dropout_module(x) + if self.quant_noise is not None: + x = self.quant_noise(x) + return x, embed + + def forward( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + uniformity_layers: Optional[List[int]] = None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + return self.forward_scriptable( + src_tokens, src_lengths, return_all_hiddens, token_embeddings, uniformity_layers + ) + + # TorchScript doesn't support super() method so that the scriptable Subclass + # can't access the base class model in Torchscript. + # Current workaround is to add a helper function with different name and + # call the helper function from scriptable Subclass. + def forward_scriptable( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + uniformity_layers: Optional[List[int]] = None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + # compute padding mask + encoder_padding_mask = src_tokens.eq(self.padding_idx) + has_pads = src_tokens.device.type == "xla" or encoder_padding_mask.any() + + x, encoder_embedding = self.forward_embedding(src_tokens, token_embeddings) + + # account for padding while computing the representation + if has_pads: + x = x * (1 - encoder_padding_mask.unsqueeze(-1).type_as(x)) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if self.use_rel_pos_enc: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + encoder_states = [] + uniformity_hiddens = [] + + if return_all_hiddens: + encoder_states.append(x) + + if uniformity_layers is not None and 0 in uniformity_layers: + x = F.normalize(x.float(), dim=-1).type_as(x) + uniformity_hiddens.append(x) + + # encoder layers + for i, layer in enumerate(self.layers): + x = layer( + x, encoder_padding_mask=encoder_padding_mask if has_pads else None, + pos_bias=pos_k, + ) + if uniformity_layers is not None and i+1 in uniformity_layers: + x = F.normalize(x.float(), dim=-1).type_as(x) + uniformity_hiddens.append(x) + if return_all_hiddens: + assert encoder_states is not None + encoder_states.append(x) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in + # `forward` so we use a dictionary instead. + # TorchScript does not support mixed values so the values are all lists. + # The empty list is equivalent to None. + src_lengths = ( + src_tokens.ne(self.padding_idx) + .sum(dim=1, dtype=torch.int32) + .reshape(-1, 1) + .contiguous() + ) + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask], # B x T + "encoder_embedding": [encoder_embedding], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "uniformity_hiddens": uniformity_hiddens, # List[T x B x C] + "src_tokens": [], + "src_lengths": [src_lengths], + } + + def forward_torchscript(self, net_input: Dict[str, Tensor]): + """A TorchScript-compatible version of forward. + + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + if torch.jit.is_scripting(): + return self.forward( + src_tokens=net_input["src_tokens"], + src_lengths=net_input["src_lengths"], + ) + else: + return self.forward_non_torchscript(net_input) + + @torch.jit.unused + def forward_non_torchscript(self, net_input: Dict[str, Tensor]): + encoder_input = { + k: v for k, v in net_input.items() if k != "prev_output_tokens" + } + return self.forward(**encoder_input) + + @torch.jit.export + def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + if len(encoder_out["encoder_out"]) == 0: + new_encoder_out = [] + else: + new_encoder_out = [encoder_out["encoder_out"][0].index_select(1, new_order)] + if len(encoder_out["encoder_padding_mask"]) == 0: + new_encoder_padding_mask = [] + else: + new_encoder_padding_mask = [ + encoder_out["encoder_padding_mask"][0].index_select(0, new_order) + ] + if len(encoder_out["encoder_embedding"]) == 0: + new_encoder_embedding = [] + else: + new_encoder_embedding = [ + encoder_out["encoder_embedding"][0].index_select(0, new_order) + ] + + if len(encoder_out["src_tokens"]) == 0: + src_tokens = [] + else: + src_tokens = [(encoder_out["src_tokens"][0]).index_select(0, new_order)] + + if len(encoder_out["src_lengths"]) == 0: + src_lengths = [] + else: + src_lengths = [(encoder_out["src_lengths"][0]).index_select(0, new_order)] + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_embedding": new_encoder_embedding, # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": src_tokens, # B x T + "src_lengths": src_lengths, # B x 1 + } + + def max_positions(self): + """Maximum input length supported by the encoder.""" + if self.embed_positions is None: + return self.max_source_positions + return min(self.max_source_positions, self.embed_positions.max_positions) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = "{}.embed_positions.weights".format(name) + if weights_key in state_dict: + print("deleting {0}".format(weights_key)) + del state_dict[weights_key] + state_dict[ + "{}.embed_positions._float_tensor".format(name) + ] = torch.FloatTensor(1) + for i in range(self.num_layers): + # update layer norms + self.layers[i].upgrade_state_dict_named( + state_dict, "{}.layers.{}".format(name, i) + ) + + version_key = "{}.version".format(name) + if utils_item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + return state_dict + + def set_num_updates(self, num_updates): + """State from trainer to pass along to model at every update.""" + + def _apply(m): + if hasattr(m, "set_num_updates") and m != self: + m.set_num_updates(num_updates) + + self.apply(_apply) + + +class TransformerEncoderLayerBase(nn.Module): + """Encoder layer block. + + In the original paper each operation (multi-head attention or FFN) is + postprocessed with: `dropout -> add residual -> layernorm`. In the + tensor2tensor code they suggest that learning is more robust when + preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.encoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + + def __init__(self, cfg, has_relative_attention_bias=False, scaling_for_att=1.0): + super().__init__() + self.cfg = cfg + self.embed_dim = cfg.encoder.embed_dim + self.quant_noise = cfg.quant_noise.pq + self.quant_noise_block_size = cfg.quant_noise.pq_block_size + self.self_attn = self.build_self_attention(self.embed_dim, cfg, has_relative_attention_bias=has_relative_attention_bias, scaling_for_att=scaling_for_att) + self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=cfg.export) + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=self.__class__.__name__ + ) + self.activation_fn = get_activation_fn(activation=cfg.activation_fn) + activation_dropout_p = cfg.activation_dropout + if activation_dropout_p == 0: + # for backwards compatibility with models that use cfg.relu_dropout + activation_dropout_p = cfg.relu_dropout or 0 + self.activation_dropout_module = FairseqDropout( + float(activation_dropout_p), module_name=self.__class__.__name__ + ) + self.normalize_before = cfg.encoder.normalize_before + self.fc1 = self.build_fc1( + self.embed_dim, + cfg.encoder.ffn_embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + self.fc2 = self.build_fc2( + cfg.encoder.ffn_embed_dim, + self.embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + + self.final_layer_norm = LayerNorm(self.embed_dim, export=cfg.export) + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embed_dim // cfg.encoder.attention_heads) + + def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise( + nn.Linear(input_dim, output_dim), p=q_noise, block_size=qn_block_size + ) + + def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise( + nn.Linear(input_dim, output_dim), p=q_noise, block_size=qn_block_size + ) + + def build_self_attention(self, embed_dim, cfg, has_relative_attention_bias=False, scaling_for_att=1.0): + return MultiheadAttention( + embed_dim, + cfg.encoder.attention_heads, + dropout=cfg.attention_dropout, + self_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + has_relative_attention_bias=has_relative_attention_bias, + scaling_for_att=scaling_for_att, + ) + + def residual_connection(self, x, residual): + return residual + x + + def upgrade_state_dict_named(self, state_dict, name): + """ + Rename layer norm states from `...layer_norms.0.weight` to + `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to + `...final_layer_norm.weight` + """ + layer_norm_map = {"0": "self_attn_layer_norm", "1": "final_layer_norm"} + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layer_norms.{}.{}".format(name, old, m) + if k in state_dict: + state_dict["{}.{}.{}".format(name, new, m)] = state_dict[k] + del state_dict[k] + + def forward( + self, + x, + encoder_padding_mask: Optional[Tensor], + attn_mask: Optional[Tensor] = None, + pos_bias=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, seq_len)` where padding elements are indicated by ``1``. + attn_mask (ByteTensor): binary tensor of shape `(tgt_len, src_len)`, + where `tgt_len` is the length of output and `src_len` is the + length of input, though here both are equal to `seq_len`. + `attn_mask[tgt_i, src_j] = 1` means that when calculating the + embedding for `tgt_i`, we exclude (mask out) `src_j`. This is + useful for strided self-attention. + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + # anything in original attn_mask = 1, becomes -1e8 + # anything in original attn_mask = 0, becomes 0 + # Note that we cannot use -inf here, because at some edge cases, + # the attention weight (before softmax) for some padded element in query + # will become -inf, which results in NaN in model parameters + if attn_mask is not None: + attn_mask = attn_mask.masked_fill( + attn_mask.to(torch.bool), -1e8 if x.dtype == torch.float32 else -1e4 + ) + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, _ = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=encoder_padding_mask, + need_weights=False, + attn_mask=attn_mask, + position_bias=pos_bias, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + return x + + +class TransformerEncoder(nn.Module): + """ + wav2vec-style transformer encoder. + """ + def __init__(self, args): + super().__init__() + + self.dropout = args.dropout + self.embedding_dim = args.encoder_embed_dim + self.required_seq_len_multiple = args.required_seq_len_multiple + + self.pos_conv = nn.Conv1d( + self.embedding_dim, + self.embedding_dim, + kernel_size=args.conv_pos, + padding=args.conv_pos // 2, + groups=args.conv_pos_groups, + ) + dropout = 0 + std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim)) + nn.init.normal_(self.pos_conv.weight, mean=0, std=std) + nn.init.constant_(self.pos_conv.bias, 0) + + self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2) + self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU()) + + layers = [] + self.use_rel_pos_enc = getattr(args, "use_rel_pos_enc", False) + for _ in range(args.encoder_layers): + layer = TransformerSentenceEncoderLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=self.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + has_relative_attention_bias=self.use_rel_pos_enc, + scaling_for_att=getattr(args, "scaling_for_att", 1.0) + ) + if args.checkpoint_activations: + raise ValueError("We don't support checkpoint_activations for now! Please set checkpoint_activations=False.") + layers.append(layer) + self.layers = nn.ModuleList(layers) + + self.layer_norm_first = args.layer_norm_first + self.layer_norm = LayerNorm(self.embedding_dim) + self.layerdrop = args.encoder_layerdrop + + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(args.encoder_embed_dim // args.encoder_attention_heads, 160) + + self.apply(init_bert_params) + + def forward(self, x, padding_mask=None, layer=None, conv_pos=True): + x, layer_results = self.extract_features(x, padding_mask, layer, conv_pos) + + if self.layer_norm_first and (layer is None or layer >= len(self.layers) - 1): + x = self.layer_norm(x) + + return x, layer_results + + def extract_features(self, x, padding_mask=None, tgt_layer=None, conv_pos=True): + + if padding_mask is not None: + x = index_put(x, padding_mask, 0) + + if conv_pos: + x_conv = self.pos_conv(x.transpose(1, 2)) + x_conv = x_conv.transpose(1, 2) + x = x + x_conv + + if not self.layer_norm_first: + x = self.layer_norm(x) + + # pad to the sequence length dimension + x, pad_length = pad_to_multiple( + x, self.required_seq_len_multiple, dim=-2, value=0 + ) + if pad_length > 0 and padding_mask is None: + padding_mask = x.new_zeros((x.size(0), x.size(1)), dtype=torch.bool) + padding_mask[:, -pad_length:] = True + else: + padding_mask, _ = pad_to_multiple( + padding_mask, self.required_seq_len_multiple, dim=-1, value=True + ) + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + if self.use_rel_pos_enc: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + layer_results = [] + r = None + for i, layer in enumerate(self.layers): + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, z = layer(x, self_attn_padding_mask=padding_mask, need_weights=False, pos_bias=pos_k) + if tgt_layer is not None: + # unpad if needed + if pad_length > 0: + layer_results.append( + x[:-pad_length] + # ( + # x[:-pad_length], + # z[:, :-pad_length, :-pad_length] + # if z is not None + # else z, + # ) + ) + else: + # layer_results.append((x, z)) + layer_results.append(x) + if i == tgt_layer: + r = x + break + + if r is not None: + x = r + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + # undo paddding + if pad_length > 0: + x = x[:, :-pad_length] + + return x, layer_results + + def max_positions(self): + """Maximum output length supported by the encoder.""" + return self.args.max_positions + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + return state_dict + + +class TransformerSentenceEncoderLayer(nn.Module): + """ + wav2vec-style transformer layer + """ + + def __init__( + self, + embedding_dim: float = 768, + ffn_embedding_dim: float = 3072, + num_attention_heads: float = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + layer_norm_first: bool = False, + has_relative_attention_bias: bool = False, + scaling_for_att: float = 1.0, + ) -> None: + + super().__init__() + # Initialize parameters + self.embedding_dim = embedding_dim + self.dropout = dropout + self.activation_dropout = activation_dropout + + # Initialize blocks + self.activation_fn = get_activation_fn(activation_fn) + self.self_attn = MultiheadAttention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + self_attention=True, + has_relative_attention_bias=has_relative_attention_bias, + scaling_for_att=scaling_for_att + ) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(self.activation_dropout) + self.dropout3 = nn.Dropout(dropout) + + self.layer_norm_first = layer_norm_first + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = LayerNorm(self.embedding_dim) + self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim) + self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim) + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = LayerNorm(self.embedding_dim) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embedding_dim//num_attention_heads) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + att_args=None, + pos_bias=None, + ): + """ + LayerNorm is applied either before or after the self-attention/ffn + modules similar to the original Transformer imlementation. + """ + residual = x + + if self.layer_norm_first: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + x = self.dropout1(x) + x = residual + x + + residual = x + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + else: + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + position_bias=pos_bias, + ) + + x = self.dropout1(x) + x = residual + x + + x = self.self_attn_layer_norm(x) + + residual = x + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + x = self.final_layer_norm(x) + + return x, attn + + +class FairseqDropout(nn.Module): + def __init__(self, p, module_name=None): + super().__init__() + self.p = p + self.module_name = module_name + self.apply_during_inference = False + + def forward(self, x, inplace: bool = False): + if self.p > 0 and (self.training or self.apply_during_inference): + return F.dropout(x, p=self.p, training=True, inplace=inplace) + else: + return x + + def make_generation_fast_( + self, + name: str, + retain_dropout: bool = False, + retain_dropout_modules: Optional[List[str]] = None, + **kwargs + ): + if retain_dropout: + if retain_dropout_modules is not None and self.module_name is None: + logger.warning( + "Cannot enable dropout during inference for module {} " + "because module_name was not set".format(name) + ) + elif ( + retain_dropout_modules is None # if None, apply to all modules + or self.module_name in retain_dropout_modules + ): + logger.info( + "Enabling dropout during inference for module: {}".format(name) + ) + self.apply_during_inference = True + else: + logger.info("Disabling dropout for module: {}".format(name)) + + +class LearnedPositionalEmbedding(nn.Embedding): + """ + This module learns positional embeddings up to a fixed maximum size. + Padding ids are ignored by either offsetting based on padding_idx + or by setting padding_idx to None and ensuring that the appropriate + position ids are passed to the forward function. + """ + + def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: int): + super().__init__(num_embeddings, embedding_dim, padding_idx) + self.onnx_trace = False + if self.padding_idx is not None: + self.max_positions = self.num_embeddings - self.padding_idx - 1 + else: + self.max_positions = self.num_embeddings + + def forward( + self, + input: Tensor, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + positions: Optional[Tensor] = None, + ): + """Input is expected to be of size [bsz x seqlen].""" + assert (positions is None) or ( + self.padding_idx is None + ), "If positions is pre-computed then padding_idx should not be set." + + if positions is None: + if incremental_state is not None: + # positions is the same for every token when decoding a single step + # Without the int() cast, it doesn't work in some cases when exporting to ONNX + positions = torch.zeros( + (1, 1), device=input.device, dtype=input.dtype + ).fill_(int(self.padding_idx + input.size(1))) + else: + positions = utils_make_positions( + input, self.padding_idx, onnx_trace=self.onnx_trace + ) + positions = torch.clamp(positions, max=self.padding_idx + self.max_positions) + return F.embedding( + positions, + self.weight, + self.padding_idx, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.sparse, + ) + + +class SinusoidalPositionalEmbedding(nn.Module): + """This module produces sinusoidal positional embeddings of any length. + + Padding symbols are ignored. + """ + + def __init__(self, embedding_dim, padding_idx, init_size=1024): + super().__init__() + self.embedding_dim = embedding_dim + self.padding_idx = padding_idx if padding_idx is not None else 0 + self.weights = SinusoidalPositionalEmbedding.get_embedding( + init_size, embedding_dim, padding_idx + ) + self.onnx_trace = False + self.register_buffer("_float_tensor", torch.FloatTensor(1)) + self.max_positions = int(1e5) + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + @staticmethod + def get_embedding( + num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None + ): + """Build sinusoidal embeddings. + + This matches the implementation in tensor2tensor, but differs slightly + from the description in Section 3.5 of "Attention Is All You Need". + """ + half_dim = embedding_dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb) + emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze( + 1 + ) * emb.unsqueeze(0) + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view( + num_embeddings, -1 + ) + if embedding_dim % 2 == 1: + # zero pad + emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1) + if padding_idx is not None: + emb[padding_idx, :] = 0 + return emb + + def forward( + self, + input, + incremental_state: Optional[Any] = None, + timestep: Optional[Tensor] = None, + positions: Optional[Any] = None, + ): + """Input is expected to be of size [bsz x seqlen].""" + bspair = torch.onnx.operators.shape_as_tensor(input) + bsz, seq_len = bspair[0], bspair[1] + max_pos = self.padding_idx + 1 + seq_len + if self.weights is None or max_pos > self.weights.size(0): + # recompute/expand embeddings if needed + self.weights = SinusoidalPositionalEmbedding.get_embedding( + max_pos, self.embedding_dim, self.padding_idx + ) + self.weights = self.weights.to(self._float_tensor) + + if incremental_state is not None: + # positions is the same for every token when decoding a single step + pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len + if self.onnx_trace: + return ( + self.weights.index_select(index=self.padding_idx + pos, dim=0) + .unsqueeze(1) + .repeat(bsz, 1, 1) + ) + return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1) + + positions = utils_make_positions( + input, self.padding_idx, onnx_trace=self.onnx_trace + ) + if self.onnx_trace: + flat_embeddings = self.weights.detach().index_select(0, positions.view(-1)) + embedding_shape = torch.cat( + (bsz.view(1), seq_len.view(1), torch.tensor([-1], dtype=torch.long)) + ) + embeddings = torch.onnx.operators.reshape_from_tensor_shape( + flat_embeddings, embedding_shape + ) + return embeddings + return ( + self.weights.index_select(0, positions.view(-1)) + .view(bsz, seq_len, -1) + .detach() + ) + + +try: + from apex.normalization import FusedLayerNorm as _FusedLayerNorm + + has_fused_layernorm = True + + class FusedLayerNorm(_FusedLayerNorm): + @torch.jit.unused + def forward(self, x): + if not x.is_cuda: + return super().forward(x) + else: + with torch.cuda.device(x.device): + return super().forward(x) + +except ImportError: + has_fused_layernorm = False + + +class Fp32LayerNorm(nn.LayerNorm): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, input): + output = F.layer_norm( + input.float(), + self.normalized_shape, + self.weight.float() if self.weight is not None else None, + self.bias.float() if self.bias is not None else None, + self.eps, + ) + return output.type_as(input) + + +class LayerDropModuleList(nn.ModuleList): + """ + A LayerDrop implementation based on :class:`torch.nn.ModuleList`. + + We refresh the choice of which layers to drop every time we iterate + over the LayerDropModuleList instance. During evaluation we always + iterate over all layers. + + Usage:: + + layers = LayerDropList(p=0.5, modules=[layer1, layer2, layer3]) + for layer in layers: # this might iterate over layers 1 and 3 + x = layer(x) + for layer in layers: # this might iterate over all layers + x = layer(x) + for layer in layers: # this might not iterate over any layers + x = layer(x) + + Args: + p (float): probability of dropping out each layer + modules (iterable, optional): an iterable of modules to add + """ + + def __init__(self, p, modules=None): + super().__init__(modules) + self.p = p + + def __iter__(self): + dropout_probs = torch.empty(len(self)).uniform_() + for i, m in enumerate(super().__iter__()): + if not self.training or (dropout_probs[i] > self.p): + yield m + + +class RelativePositionalEncoding(torch.nn.Module): + def __init__(self, d_model, maxlen=1000, embed_v=False): + super(RelativePositionalEncoding, self).__init__() + + self.d_model = d_model + self.maxlen = maxlen + self.pe_k = torch.nn.Embedding(2*maxlen, d_model) + if embed_v: + self.pe_v = torch.nn.Embedding(2*maxlen, d_model) + self.embed_v = embed_v + + + def forward(self, pos_seq, incremental_state=None): + pos_seq[pos_seq < -self.maxlen] = -self.maxlen + pos_seq[pos_seq >= self.maxlen] = self.maxlen - 1 + pos_seq = pos_seq + self.maxlen + + if incremental_state is not None: + pos_seq = pos_seq[-1:] + + if self.embed_v: + return self.pe_k(pos_seq), self.pe_v(pos_seq) + else: + return self.pe_k(pos_seq), None + + +class MultiheadAttention(nn.Module): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + has_relative_attention_bias=False, + scaling_for_att=1.0 + ): + super().__init__() + self.embed_dim = embed_dim + self.kdim = kdim if kdim is not None else embed_dim + self.vdim = vdim if vdim is not None else embed_dim + self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim + + self.num_heads = num_heads + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + + self.has_relative_attention_bias = has_relative_attention_bias + + self.head_dim = embed_dim // num_heads + assert ( + self.head_dim * num_heads == self.embed_dim + ), "embed_dim must be divisible by num_heads" + self.scaling = self.head_dim ** -0.5 + self.scaling_for_att = scaling_for_att + + self.self_attention = self_attention + self.encoder_decoder_attention = encoder_decoder_attention + + assert not self.self_attention or self.qkv_same_dim, ( + "Self-attention requires query, key and " "value to be of the same size" + ) + + self.k_proj = quant_noise( + nn.Linear(self.kdim, embed_dim, bias=bias), q_noise, qn_block_size + ) + self.v_proj = quant_noise( + nn.Linear(self.vdim, embed_dim, bias=bias), q_noise, qn_block_size + ) + self.q_proj = quant_noise( + nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size + ) + + self.out_proj = quant_noise( + nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size + ) + + if add_bias_kv: + self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim)) + self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim)) + else: + self.bias_k = self.bias_v = None + + self.add_zero_attn = add_zero_attn + + self.reset_parameters() + + self.onnx_trace = False + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def reset_parameters(self): + if self.qkv_same_dim: + # Empirically observed the convergence to be much better with + # the scaled initialization + nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2)) + nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2)) + nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2)) + else: + nn.init.xavier_uniform_(self.k_proj.weight) + nn.init.xavier_uniform_(self.v_proj.weight) + nn.init.xavier_uniform_(self.q_proj.weight) + + nn.init.xavier_uniform_(self.out_proj.weight) + if self.out_proj.bias is not None: + nn.init.constant_(self.out_proj.bias, 0.0) + if self.bias_k is not None: + nn.init.xavier_normal_(self.bias_k) + if self.bias_v is not None: + nn.init.xavier_normal_(self.bias_v) + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + position_bias: Optional[Tensor] = None + ) -> Tuple[Tensor, Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + is_tpu = query.device.type == "xla" + + tgt_len, bsz, embed_dim = query.size() + src_len = tgt_len + assert embed_dim == self.embed_dim, f"query dim {embed_dim} != {self.embed_dim}" + assert list(query.size()) == [tgt_len, bsz, embed_dim] + if key is not None: + src_len, key_bsz, _ = key.size() + if not torch.jit.is_scripting(): + assert key_bsz == bsz + assert value is not None + assert src_len, bsz == value.shape[:2] + + if ( + not self.onnx_trace + and not is_tpu # don't use PyTorch version on TPUs + and incremental_state is None + and not static_kv + # A workaround for quantization to work. Otherwise JIT compilation + # treats bias in linear module as method. + and not torch.jit.is_scripting() + and not self.has_relative_attention_bias + ): + assert key is not None and value is not None + return F.multi_head_attention_forward( + query, + key, + value, + self.embed_dim, + self.num_heads, + torch.empty([0]), + torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), + self.bias_k, + self.bias_v, + self.add_zero_attn, + self.dropout_module.p, + self.out_proj.weight, + self.out_proj.bias, + self.training or self.dropout_module.apply_during_inference, + key_padding_mask, + need_weights, + attn_mask, + use_separate_proj_weight=True, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + ) + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + q *= (1 / self.scaling_for_att) + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if k is not None: + k = ( + k.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + src_len = k.size(1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + assert k.size(1) == src_len + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + torch.zeros(key_padding_mask.size(0), 1).type_as( + key_padding_mask + ), + ], + dim=1, + ) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + if position_bias is not None: ## first order + ## position_bias: [241, 241, 64] + #print ("attn_weights: ", attn_weights.size()) # [492, 241, 241] + reshape_q = q.contiguous().view(bsz * self.num_heads, -1, self.head_dim).transpose(0,1) #[241, 492, 64] + #print ("reshape_q: ", reshape_q.size()) + B = torch.matmul(reshape_q, position_bias.transpose(-2, -1)) + #print ("B: ", B.size()) ## [241, 492, 241] + #B = B.transpose(0, 1).view(bsz, self.num_heads, position_bias.size(0), position_bias.size(1)) + B = B.transpose(0, 1).view(bsz*self.num_heads, position_bias.size(0), position_bias.size(1)) + #print ("B 2: ", B.size()) + attn_weights += B + + attn_weights *= self.scaling_for_att + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + if not is_tpu: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if self.scaling_for_att > 1.0: + attn_weights = attn_weights - attn_weights.detach().max(dim=-1, keepdim=True)[0] + + if before_softmax: + return attn_weights, v + + attn_weights_float = softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace + ) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + if self.onnx_trace and attn.size(1) == 1: + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + attn_weights = attn_weights_float.view( + bsz, self.num_heads, tgt_len, src_len + ).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights + + @staticmethod + def _append_prev_key_padding_mask( + key_padding_mask: Optional[Tensor], + prev_key_padding_mask: Optional[Tensor], + batch_size: int, + src_len: int, + static_kv: bool, + ) -> Optional[Tensor]: + # saved key padding masks have shape (bsz, seq_len) + if prev_key_padding_mask is not None and static_kv: + new_key_padding_mask = prev_key_padding_mask + elif prev_key_padding_mask is not None and key_padding_mask is not None: + new_key_padding_mask = torch.cat( + [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1 + ) + # During incremental decoding, as the padding token enters and + # leaves the frame, there will be a time when prev or current + # is None + elif prev_key_padding_mask is not None: + if src_len > prev_key_padding_mask.size(1): + filler = torch.zeros( + (batch_size, src_len - prev_key_padding_mask.size(1)), + device=prev_key_padding_mask.device, + ) + new_key_padding_mask = torch.cat( + [prev_key_padding_mask.float(), filler.float()], dim=1 + ) + else: + new_key_padding_mask = prev_key_padding_mask.float() + elif key_padding_mask is not None: + if src_len > key_padding_mask.size(1): + filler = torch.zeros( + (batch_size, src_len - key_padding_mask.size(1)), + device=key_padding_mask.device, + ) + new_key_padding_mask = torch.cat( + [filler.float(), key_padding_mask.float()], dim=1 + ) + else: + new_key_padding_mask = key_padding_mask.float() + else: + new_key_padding_mask = prev_key_padding_mask + return new_key_padding_mask + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + new_order: Tensor, + ): + """Reorder buffered internal state (for incremental generation).""" + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + for k in input_buffer.keys(): + input_buffer_k = input_buffer[k] + if input_buffer_k is not None: + if self.encoder_decoder_attention and input_buffer_k.size( + 0 + ) == new_order.size(0): + break + input_buffer[k] = input_buffer_k.index_select(0, new_order) + incremental_state = self._set_input_buffer(incremental_state, input_buffer) + return incremental_state + + def _get_input_buffer( + self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] + ) -> Dict[str, Optional[Tensor]]: + result = self.get_incremental_state(incremental_state, "attn_state") + if result is not None: + return result + else: + empty_result: Dict[str, Optional[Tensor]] = {} + return empty_result + + def _set_input_buffer( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + buffer: Dict[str, Optional[Tensor]], + ): + return self.set_incremental_state(incremental_state, "attn_state", buffer) + + def apply_sparse_mask(self, attn_weights, tgt_len: int, src_len: int, bsz: int): + return attn_weights + + def upgrade_state_dict_named(self, state_dict, name): + prefix = name + "." if name != "" else "" + items_to_add = {} + keys_to_remove = [] + for k in state_dict.keys(): + if k.endswith(prefix + "in_proj_weight"): + # in_proj_weight used to be q + k + v with same dimensions + dim = int(state_dict[k].shape[0] / 3) + items_to_add[prefix + "q_proj.weight"] = state_dict[k][:dim] + items_to_add[prefix + "k_proj.weight"] = state_dict[k][dim : 2 * dim] + items_to_add[prefix + "v_proj.weight"] = state_dict[k][2 * dim :] + + keys_to_remove.append(k) + + k_bias = prefix + "in_proj_bias" + if k_bias in state_dict.keys(): + dim = int(state_dict[k].shape[0] / 3) + items_to_add[prefix + "q_proj.bias"] = state_dict[k_bias][:dim] + items_to_add[prefix + "k_proj.bias"] = state_dict[k_bias][ + dim : 2 * dim + ] + items_to_add[prefix + "v_proj.bias"] = state_dict[k_bias][2 * dim :] + + keys_to_remove.append(prefix + "in_proj_bias") + + for k in keys_to_remove: + del state_dict[k] + + for key, value in items_to_add.items(): + state_dict[key] = value + + +class ConvFeatureExtractionModel(nn.Module): + def __init__( + self, + conv_layers: List[Tuple[int, int, int]], + dropout: float = 0.0, + mode: str = "default", + conv_bias: bool = False, + ): + super().__init__() + + assert mode in {"default", "layer_norm"} + + def block( + n_in, + n_out, + k, + stride, + is_layer_norm=False, + is_group_norm=False, + conv_bias=False, + ): + def make_conv(): + conv = nn.Conv1d(n_in, n_out, k, stride=stride, bias=conv_bias) + nn.init.kaiming_normal_(conv.weight) + return conv + + assert ( + is_layer_norm and is_group_norm + ) == False, "layer norm and group norm are exclusive" + + if is_layer_norm: + return nn.Sequential( + make_conv(), + nn.Dropout(p=dropout), + nn.Sequential( + TransposeLast(), + Fp32LayerNorm(dim, elementwise_affine=True), + TransposeLast(), + ), + nn.GELU(), + ) + elif is_group_norm: + return nn.Sequential( + make_conv(), + nn.Dropout(p=dropout), + Fp32GroupNorm(dim, dim, affine=True), + nn.GELU(), + ) + else: + return nn.Sequential(make_conv(), nn.Dropout(p=dropout), nn.GELU()) + + in_d = 1 + self.conv_layers = nn.ModuleList() + for i, cl in enumerate(conv_layers): + assert len(cl) == 3, "invalid conv definition: " + str(cl) + (dim, k, stride) = cl + + self.conv_layers.append( + block( + in_d, + dim, + k, + stride, + is_layer_norm=mode == "layer_norm", + is_group_norm=mode == "default" and i == 0, + conv_bias=conv_bias, + ) + ) + in_d = dim + + def forward(self, x): + + # BxT -> BxCxT + x = x.unsqueeze(1) + + for conv in self.conv_layers: + x = conv(x) + + return x + + +class TransposeLast(nn.Module): + def __init__(self, deconstruct_idx=None): + super().__init__() + self.deconstruct_idx = deconstruct_idx + + def forward(self, x): + if self.deconstruct_idx is not None: + x = x[self.deconstruct_idx] + return x.transpose(-2, -1) + + +class Fp32GroupNorm(nn.GroupNorm): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, input): + output = F.group_norm( + input.float(), + self.num_groups, + self.weight.float() if self.weight is not None else None, + self.bias.float() if self.bias is not None else None, + self.eps, + ) + return output.type_as(input) + + +class GradMultiply(torch.autograd.Function): + @staticmethod + def forward(ctx, x, scale): + ctx.scale = scale + res = x.new(x) + return res + + @staticmethod + def backward(ctx, grad): + return grad * ctx.scale, None + + +class Rotate3D(nn.Module): + """ + (T, B, D) --> (B, D, T) --> (D, T, B) --> (T, B, D) + """ + def __init__(self): + super().__init__() + + def forward(self, x): + return x.permute(1, 2, 0) + + +class SamePad(nn.Module): + def __init__(self, kernel_size, causal=False): + super().__init__() + if causal: + self.remove = kernel_size - 1 + else: + self.remove = 1 if kernel_size % 2 == 0 else 0 + + def forward(self, x): + if self.remove > 0: + x = x[:, :, : -self.remove] + return x diff --git a/SpeechT5/SpeechLM/speechlm/__init__.py b/SpeechT5/SpeechLM/speechlm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97327d269e93a13cd135f6c1a187fd820a8decb8 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/__init__.py @@ -0,0 +1 @@ +from . import data, tasks, criterions, models diff --git a/SpeechT5/SpeechLM/speechlm/config/decode/infer_fsqlm.yaml b/SpeechT5/SpeechLM/speechlm/config/decode/infer_fsqlm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..005524facca0996a9a299cbe9bdd21570bd65c2e --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/config/decode/infer_fsqlm.yaml @@ -0,0 +1,44 @@ +# @package _group_ + +defaults: + - model: null + +hydra: + run: + dir: ${common_eval.results_path}/beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight} + sweep: + dir: ${common_eval.results_path} + subdir: beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight} + +task: + _name: joint_sc2t_pretraining + data: ??? + label_dir: ??? + labels: ["ltr"] + store_labels: true + single_target: true + fine_tuning: true + normalize: ??? # must be consistent with pre-training + add_decoder_target: false + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +decoding: + type: fairseqlm + lexicon: ??? + lmpath: ??? + beamthreshold: 25 + beam: 500 + lmweight: 2 + wordscore: -1 + silweight: 0 + unique_wer_file: true +common_eval: + results_path: ??? + path: ??? + post_process: letter +dataset: + max_tokens: 1100000 + gen_subset: ??? diff --git a/SpeechT5/SpeechLM/speechlm/config/decode/infer_kenlm.yaml b/SpeechT5/SpeechLM/speechlm/config/decode/infer_kenlm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98a8b7567d77fbee0eb38c3554d332a127f830f4 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/config/decode/infer_kenlm.yaml @@ -0,0 +1,44 @@ +# @package _group_ + +defaults: + - model: null + +hydra: + run: + dir: ${common_eval.results_path}/beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight} + sweep: + dir: ${common_eval.results_path} + subdir: beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight} + +task: + _name: joint_sc2t_pretraining + data: ??? + label_dir: ??? + labels: ["ltr"] + store_labels: true + single_target: true + fine_tuning: true + normalize: ??? # must be consistent with pre-training + add_decoder_target: false + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +decoding: + type: kenlm + lexicon: ??? + lmpath: ??? + beamthreshold: 100 + beam: 500 + lmweight: 2 + wordscore: -1 + silweight: 0 + unique_wer_file: true +common_eval: + results_path: ??? + path: ??? + post_process: letter +dataset: + max_tokens: 1100000 + gen_subset: ??? diff --git a/SpeechT5/SpeechLM/speechlm/config/decode/infer_viterbi.yaml b/SpeechT5/SpeechLM/speechlm/config/decode/infer_viterbi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..969fc23df233d1785fb28bd340c15614877b0272 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/config/decode/infer_viterbi.yaml @@ -0,0 +1,37 @@ +# @package _group_ + +defaults: + - model: null + +hydra: + run: + dir: ${common_eval.results_path}/viterbi + sweep: + dir: ${common_eval.results_path} + subdir: viterbi + +task: + _name: joint_sc2t_pretraining + data: ??? + label_dir: ??? + labels: ["ltr"] + store_labels: true + single_target: true + fine_tuning: true + normalize: ??? # must be consistent with pre-training + add_decoder_target: false + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +decoding: + type: viterbi + unique_wer_file: true +common_eval: + results_path: ??? + path: ??? + post_process: letter +dataset: + batch_size: 1 + gen_subset: ??? diff --git a/SpeechT5/SpeechLM/speechlm/config/finetune/speechlm_base_100h.yaml b/SpeechT5/SpeechLM/speechlm/config/finetune/speechlm_base_100h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c48b856da69f75205d8c0e47752cef173194558c --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/config/finetune/speechlm_base_100h.yaml @@ -0,0 +1,99 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 1 + keep_last_epochs: 1 + keep_best_checkpoints: -1 + best_checkpoint_metric: wer + restore_file: checkpoint_last.pt + +distributed_training: + ddp_backend: legacy_ddp + find_unused_parameters: true + distributed_world_size: 1 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: joint_sc2t_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: false # must be consistent with pre-training + labels: ["ltr"] + store_labels: true + single_target: true + add_decoder_target: false + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +dataset: + num_workers: 0 + max_tokens: 1600000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_100 + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 30000 + lr: [0.00001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: speechlm_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/SpeechLM/speechlm/config/finetune/speechlm_large_960h.yaml b/SpeechT5/SpeechLM/speechlm/config/finetune/speechlm_large_960h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc86b6f284dc126dbc614639130e57898d05d2e1 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/config/finetune/speechlm_large_960h.yaml @@ -0,0 +1,98 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + +checkpoint: + save_interval: 1 + keep_last_epochs: 5 + keep_best_checkpoints: 5 + best_checkpoint_metric: wer + restore_file: checkpoint_last.pt + +distributed_training: + ddp_backend: legacy_ddp + find_unused_parameters: true + distributed_world_size: 32 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: joint_sc2t_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: true # must be consistent with pre-training + labels: ["ltr"] + store_labels: true + single_target: true + add_decoder_target: false + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +dataset: + num_workers: 0 + max_tokens: 900000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_960 + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: ctc + zero_infinity: true + +optimization: + max_update: 200000 + lr: [0.00001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: speechlm_ctc + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.0 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/SpeechLM/speechlm/config/pretrain/speechlm_base_librispeech.yaml b/SpeechT5/SpeechLM/speechlm/config/pretrain/speechlm_base_librispeech.yaml new file mode 100644 index 0000000000000000000000000000000000000000..454f5e2a8b6b67f8b7bf906e7e38f395b9ca9e96 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/config/pretrain/speechlm_base_librispeech.yaml @@ -0,0 +1,139 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 4 + keep_last_epochs: 4 + save_interval_updates: 50000 + keep_interval_updates: -1 + keep_interval_updates_pattern: 50000 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: legacy_ddp + distributed_backend: 'nccl' + distributed_port: -1 + distributed_world_size: 32 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: joint_sc2t_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + store_labels: true + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: false # must be consistent with extractor + add_decoder_target: false + text_cfg: + seed: ${common.seed} + text_data: ??? + data_config: config.yaml + sample_break_mode: eos + tokens_per_sample: 1024 + shorten_method: "random_crop" + text_maxtokens_ratio: 1.0 + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 1 + +criterion: + _name: speechlm_criterion + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + text_ctc_weight: 0.1 + text_mum_weight: 0.0 + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: speechlm + label_rate: ??? + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: default + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + activation_fn: "gelu" + encoder_layers: 6 + encoder_attention_heads: 8 + encoder_layerdrop: 0.1 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + add_unit_encoder: true + add_text_ctc: true + mask_u2t: true + compute_mum: false + mix_with_unit: true + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + max_source_positions: 3000 + no_scale_embedding: true + layernorm_embedding: true + no_token_positional_embeddings: false + encoder: + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 8 + normalize_before: false + learned_pos: true + layerdrop: ${model.encoder_layerdrop} + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/SpeechLM/speechlm/config/pretrain/speechlm_large_librilight.yaml b/SpeechT5/SpeechLM/speechlm/config/pretrain/speechlm_large_librilight.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74d593e3cf099f2116f541d87a644ea48f5642d6 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/config/pretrain/speechlm_large_librilight.yaml @@ -0,0 +1,144 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1234 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 1 + keep_last_epochs: 4 + save_interval_updates: 10000 + keep_interval_updates: 40 + keep_interval_updates_pattern: 10000 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: legacy_ddp + distributed_backend: 'nccl' + distributed_port: -1 + distributed_world_size: 32 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: joint_sc2t_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + store_labels: true + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: true # must be consistent with extractor + add_decoder_target: false + text_cfg: + seed: ${common.seed} + text_data: ??? + data_config: config.yaml + sample_break_mode: eos + tokens_per_sample: 1024 + shorten_method: "random_crop" + text_maxtokens_ratio: 1.0 + +dataset: + num_workers: 1 + max_tokens: 900000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 2 + +criterion: + _name: speechlm_criterion + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + text_ctc_weight: 0.1 + text_mum_weight: 0.0 + +optimization: + max_update: 400000 + lr: [0.001] + clip_norm: 1.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: speechlm + label_rate: ??? + activation_fn: "gelu" + encoder_layers: 12 + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + encoder_attention_heads: 16 + final_dim: 256 + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: layer_norm + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + encoder_layerdrop: 0.0 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + layer_norm_first: true + feature_grad_mult: 1.0 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + add_unit_encoder: true + add_text_ctc: true + mask_u2t: true + compute_mum: false + mix_with_unit: true + scaling_for_att: 32 + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + max_source_positions: 3000 + no_scale_embedding: true + layernorm_embedding: true + no_token_positional_embeddings: false + encoder: + embed_dim: 1024 + ffn_embed_dim: 4096 + layers: 12 + attention_heads: 16 + normalize_before: ${model.layer_norm_first} + learned_pos: true + layerdrop: ${model.encoder_layerdrop} + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} + diff --git a/SpeechT5/SpeechLM/speechlm/config/pretrain/speechlmp_base_cfg.pt b/SpeechT5/SpeechLM/speechlm/config/pretrain/speechlmp_base_cfg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b184b949fdf1fee0c35eb97892f7d24b38924e33 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/config/pretrain/speechlmp_base_cfg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99560910d3db14a6e1a2357d4812e7bb0ff230aae1a5885caa383f3d2881e442 +size 31407 diff --git a/SpeechT5/SpeechLM/speechlm/criterions/__init__.py b/SpeechT5/SpeechLM/speechlm/criterions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..285826b653331f6c1a326de4ed234605e35876df --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/criterions/__init__.py @@ -0,0 +1,9 @@ +import importlib +import os + +for file in os.listdir(os.path.dirname(__file__)): + if file.endswith(".py") and not file.startswith("_"): + criterion_name = file[: file.find(".py")] + importlib.import_module( + "speechlm.criterions." + criterion_name + ) diff --git a/SpeechT5/SpeechLM/speechlm/criterions/fasttext2unit_loss.py b/SpeechT5/SpeechLM/speechlm/criterions/fasttext2unit_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..2cceed3f102d5ab5ff355db9960c74dbe31a8d93 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/criterions/fasttext2unit_loss.py @@ -0,0 +1,181 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +from typing import List, Dict, Any +from dataclasses import dataclass, field + +import torch +import torch.nn.functional as F + +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from fairseq.data.data_utils import lengths_to_mask +from fairseq.models.fairseq_model import FairseqEncoderModel + +def label_smoothed_nll_loss(lprobs, target, epsilon, ignore_index=None, reduce=True): + if target.dim() == lprobs.dim() - 1: + target = target.unsqueeze(-1) + nll_loss = -lprobs.gather(dim=-1, index=target) + smooth_loss = -lprobs.sum(dim=-1, keepdim=True) + if ignore_index is not None: + pad_mask = target.eq(ignore_index) + nll_loss.masked_fill_(pad_mask, 0.0) + smooth_loss.masked_fill_(pad_mask, 0.0) + else: + nll_loss = nll_loss.squeeze(-1) + smooth_loss = smooth_loss.squeeze(-1) + if reduce: + ntokens = (~pad_mask).sum() + nll_loss = nll_loss.sum() / ntokens + smooth_loss = smooth_loss.sum() / ntokens + eps_i = epsilon / (lprobs.size(-1) - 1) + loss = (1.0 - epsilon - eps_i) * nll_loss + eps_i * smooth_loss + return loss, nll_loss + +@dataclass +class FastText2UnitCriterionConfig(FairseqDataclass): + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + dur_loss_weight: float = field( + default=1.0, + metadata={"help": "scale of duration loss"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + +@register_criterion("fasttext2unit_criterion", dataclass=FastText2UnitCriterionConfig) +class FastText2UnitLoss(FairseqCriterion): + def __init__(self, + task, + label_smoothing=0, + dur_loss_weight=1.0, + report_accuracy=False, + ): + super().__init__(task) + self.eps = label_smoothing + self.dur_loss_weight = dur_loss_weight + self.pad_idx = task.tgt_dict.pad() + self.report_accuracy = report_accuracy + + def forward(self, model: FairseqEncoderModel, sample, reduction="mean"): + src_tokens = sample["net_input"]["src_tokens"] + src_lens = sample["net_input"]["src_lengths"] + tgt_lens = sample["target_lengths"] + + _feat_out, _feat_out_post, out_lens, log_dur_out, pitch_out, energy_out = model( + src_tokens=src_tokens, + src_lengths=src_lens, + prev_output_tokens=sample["net_input"]["prev_output_tokens"], + incremental_state=None, + target_lengths=tgt_lens, + speaker=sample["speaker"], + durations=sample["durations"], + pitches=sample["pitches"], + energies=sample["energies"], + ) + + src_mask = lengths_to_mask(sample["net_input"]["src_lengths"]) + tgt_mask = lengths_to_mask(sample["target_lengths"]) + + lprobs = model.get_normalized_probs((_feat_out,), log_probs=True) + target = sample["target"].long() + ce_loss, nll_loss = label_smoothed_nll_loss(lprobs, target, self.eps, self.padding_idx, reduce=True) + + pitches, energies = sample["pitches"], sample["energies"] + if pitches is not None: + pitch_out, pitches = pitch_out[src_mask], pitches[src_mask] + pitch_loss = F.mse_loss(pitch_out, pitches, reduction=reduction) + else: + pitch_loss = 0 + if energies is not None: + energy_out, energies = energy_out[src_mask], energies[src_mask] + energy_loss = F.mse_loss(energy_out, energies, reduction=reduction) + else: + energy_loss = 0 + + log_dur_out = log_dur_out[src_mask] + dur = sample["durations"].float() + dur = dur.half() if log_dur_out.type().endswith(".HalfTensor") else dur + log_dur = torch.log(dur + 1)[src_mask] + dur_loss = F.mse_loss(log_dur_out, log_dur, reduction=reduction) + dur_loss = self.dur_loss_weight * dur_loss + + loss = ce_loss + dur_loss + pitch_loss + energy_loss + + sample_size = sample["nsentences"] + logging_output = { + "loss": utils.item(loss.data), + "ntokens": sample["ntokens"], + "nsentences": sample["nsentences"], + "sample_size": sample_size, + "ce_loss": utils.item(ce_loss.data), + "dur_loss": utils.item(dur_loss.data), + "pitch_loss": utils.item(pitch_loss), + "energy_loss": utils.item(energy_loss), + } + if self.report_accuracy: + n_correct = lprobs.argmax(-1).masked_select(tgt_mask).eq(target.masked_select(tgt_mask)).sum() + logging_output["n_correct"] = utils.item(n_correct.data) + logging_output["total"] = tgt_mask.sum() + return loss, 1, logging_output + + @classmethod + def reduce_metrics(cls, logging_outputs: List[Dict[str, Any]]) -> None: + ns = [log.get("sample_size", 0) for log in logging_outputs] + ntot = sum(ns) + ws = [n / (ntot + 1e-8) for n in ns] + for key in [ + "loss", + "ce_loss", + "dur_loss", + "pitch_loss", + "energy_loss", + ]: + vals = [log.get(key, 0) for log in logging_outputs] + val = sum(val * w for val, w in zip(vals, ws)) + metrics.log_scalar(key, val, ntot, round=3) + metrics.log_scalar("sample_size", ntot, len(logging_outputs)) + + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("n_correct", n_correct) + metrics.log_derived( + "accuracy", + lambda meters: round( + meters["n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + ) + + # inference metrics + if "targ_frames" not in logging_outputs[0]: + return + n = sum(log.get("targ_frames", 0) for log in logging_outputs) + for key, new_key in [ + ("mcd_loss", "mcd_loss"), + ("pred_frames", "pred_ratio"), + ("nins", "ins_rate"), + ("ndel", "del_rate"), + ]: + val = sum(log.get(key, 0) for log in logging_outputs) + metrics.log_scalar(new_key, val / n, n, round=3) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + return False diff --git a/SpeechT5/SpeechLM/speechlm/criterions/speechlm_criterion.py b/SpeechT5/SpeechLM/speechlm/criterions/speechlm_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..ca13298d7c30b31c2d083137090679e36a7ffd39 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/criterions/speechlm_criterion.py @@ -0,0 +1,352 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import math +import re +from dataclasses import dataclass, field +from typing import List, Optional + +import numpy as np +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.dataclass import FairseqDataclass + +logger = logging.getLogger(__name__) + +@dataclass +class HSTCriterionConfig(FairseqDataclass): + pred_masked_weight: float = field( + default=1.0, + metadata={"help": "weight for predictive loss for masked frames"}, + ) + pred_nomask_weight: float = field( + default=0.0, + metadata={"help": "weight for predictive loss for unmasked frames"}, + ) + loss_weights: Optional[List[float]] = field( + default=None, + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + log_keys: List[str] = field( + default_factory=lambda: [], + metadata={"help": "output keys to log"}, + ) + text_ctc_weight: float = field( + default=0.1, + metadata={"help": "weights for text CTC Loss, loss will be (hubert_loss + dec_weight * CE_Loss + text_weight * (CE_Loss + CTC_loss))"}, + ) + text_mum_weight: float = field( + default=0.0, + metadata={"help": "masked unit modeling weight from the text end"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + no_ctc_blank: bool = field( + default=False, + metadata={"help": "mask out the blank of ctc, only when dec_loss_type=ctc"}, + ) + +@register_criterion("speechlm_criterion", dataclass=HSTCriterionConfig) +class SpeechLMCriterion(FairseqCriterion): + def __init__( + self, + task, + pred_masked_weight, + pred_nomask_weight, + loss_weights=None, + log_keys=None, + text_ctc_weight=0.1, + text_mum_weight=0, + report_accuracy=False, + ignore_prefix_size=0, + no_ctc_blank=False, + ): + super().__init__(task) + self.pred_masked_weight = pred_masked_weight + self.pred_nomask_weight = pred_nomask_weight + self.loss_weights = loss_weights + self.log_keys = [] if log_keys is None else log_keys + self.text_ctc_weight = text_ctc_weight + self.text_mum_weight = text_mum_weight + self.report_accuracy = report_accuracy + self.ignore_prefix_size = ignore_prefix_size + self.no_ctc_blank = no_ctc_blank + self.padding_idx = task.dictionaries[0].pad() + self.eos_idx = task.dictionaries[0].eos() + self.blank_idx = task.dictionaries[0].bos() + + def compute_hubert_loss(self, model, net_output, reduction, suffix=''): + loss = 0 + sample_size = [] + logging_output = {} + loss_m_list = [] + logp_m_list = model.get_logits(net_output, True) + targ_m_list = model.get_targets(net_output, True) + assert self.pred_masked_weight == 0 or len(logp_m_list) > 0 + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"loss_m_{i}{suffix}"] = loss_m.detach().item() + if self.pred_masked_weight > 0: + loss += self.pred_masked_weight * sum(loss_m_list) + sample_size.append(targ_m_list[0].numel()) + + loss_u_list = [] + logp_u_list = model.get_logits(net_output, False) + targ_u_list = model.get_targets(net_output, False) + assert self.pred_nomask_weight == 0 or len(logp_u_list) > 0 + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"loss_u_{i}{suffix}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss += self.pred_nomask_weight * sum(loss_u_list) + sample_size.append(targ_u_list[0].numel()) + + sample_size = np.mean(sample_size) + + def compute_correct(logits, targets): + if logits.numel() == 0: + return 0, 0 + else: + assert logits.dim() > 1, logits.shape + max = logits.argmax(-1) == targets + min = logits.argmin(-1) == targets + both = max & min + corr = max.long().sum().item() - both.long().sum().item() + count = max.numel() + return corr, count + + with torch.no_grad(): + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + corr_m, count_m = compute_correct(logp_m, targ_m) + logging_output[f"correct_m_{i}{suffix}"] = corr_m + logging_output[f"count_m_{i}{suffix}"] = count_m + + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + corr_u, count_u = compute_correct(logp_u, targ_u) + logging_output[f"correct_u_{i}{suffix}"] = corr_u + logging_output[f"count_u_{i}{suffix}"] = count_u + + return loss, sample_size, logging_output + + + def forward(self, model, sample, reduce=True, log_pred=False): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + reduction = "sum" if reduce else "none" + + if "net_input" in sample: + text_sample = None + else: + text_sample = sample.get("text_paired") + sample = sample.get("speech") + + ### 1. L_UMLM: do hubert forward and loss computation + sample["modality"] = "speech" + net_output = model(target_list=sample["target_list"], **sample["net_input"]) + loss, sample_size, logging_output = self.compute_hubert_loss( + model, + net_output, + reduction, + ) + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len( + self.loss_weights + ), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss += p + logging_output[f"loss_{n}"] = p.item() + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + ### 2. do text forward and loss computation + if text_sample is not None: + text_sample["modality"] = "text" + ## 2.1 re-loading "target_list", in default case, target_list = [src_tokens], + ## while in case of using "unit-phone-char" structure, target_list will be [ref_tokens] + text_sample["net_input"]["target_list"] = [ + text_sample.get("ref_tokens", text_sample["net_input"]["src_tokens"].clone()), + ] + text_net_output = model(**text_sample["net_input"]) + + ### 2.2 L_UMLM (text-end, not applied by default) + if self.text_mum_weight > 0: + loss_u2t, sample_size_u2t, logging_output_u2t = self.compute_hubert_loss( + model, + text_net_output, + reduction, + suffix="_u2t", + ) + loss += self.text_mum_weight * loss_u2t * sample_size / sample_size_u2t + logging_output.update(logging_output_u2t) + + ### 2.3 L_UCTC + text_sample_size = text_sample["ntokens"] + if self.text_ctc_weight > 0: + text_ctc_loss = self.compute_ctc_loss(model, text_net_output, text_sample["target"], reduction=reduction) + loss += self.text_ctc_weight * text_ctc_loss * sample_size / text_sample_size + logging_output["text_ctc_loss"] = utils.item(text_ctc_loss) + logging_output["text_sample_size"] = text_sample_size + + logging_output = { + "loss": utils.item(loss) if reduce else loss, + "ntokens": sample_size, + "nsentences": sample["id"].numel() + (text_sample["id"].numel() if text_sample is not None else 0), + "sample_size": sample_size, + **logging_output, + } + + return loss, sample_size, logging_output + + def compute_ctc_loss(self, model, net_output, target, reduction): + logits = net_output["encoder_out_ctc"][0] # (T, B, C) from the code-encoder + if self.no_ctc_blank: + ## set prob of to -inf + logits = logits.float() + logits[:, :, self.blank_idx] = -1000000.0 + + lprobs = F.log_softmax(logits.float(), dim=-1) + + encoder_padding_mask = net_output["encoder_padding_mask"][0] + non_padding_mask = ~encoder_padding_mask + input_lengths = non_padding_mask.long().sum(-1) + pad_mask = (target != self.padding_idx) & (target != self.eos_idx) + targets_flat = target.masked_select(pad_mask) + target_lengths = pad_mask.sum(-1) + + with torch.backends.cudnn.flags(enabled=False): + loss = F.ctc_loss( + lprobs, + targets_flat, + input_lengths, + target_lengths, + blank=self.blank_idx, + reduction=reduction, + zero_infinity=True, + ) + return loss + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.padding_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.padding_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + if sample["modality"] == "speech": + target = sample["decoder_target"] + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + else: + target = sample["target"] + + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training (copied from normal cross entropy).""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + else: + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + counts = {} + for lk in logging_outputs[0].keys(): + if lk.startswith("count_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val) + counts[lk] = val + + for lk in logging_outputs[0].keys(): + if lk.startswith("loss_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / sample_size / math.log(2), round=3) + elif lk.startswith("correct_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)]) + + if "text_sample_size" in logging_outputs[0]: + text_sample_size = sum(log.get("text_sample_size", 0) for log in logging_outputs) + for lk in logging_outputs[0].keys(): + if lk.startswith("text_") and lk.endswith("_loss"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / text_sample_size / math.log(2), round=3) + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + raise NotImplementedError() + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/SpeechT5/SpeechLM/speechlm/data/concat_dataset.py b/SpeechT5/SpeechLM/speechlm/data/concat_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..9c3c22bc806089a0dd92fd2eff44a78528c975ed --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data/concat_dataset.py @@ -0,0 +1,131 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import bisect + +import numpy as np +from torch.utils.data.dataloader import default_collate + +from fairseq.data import FairseqDataset + + +class ConcatDataset(FairseqDataset): + @staticmethod + def cumsum(sequence, sample_ratios): + r, s = [], 0 + for e, ratio in zip(sequence, sample_ratios): + curr_len = int(ratio * len(e)) + r.append(curr_len + s) + s += curr_len + return r + + def __init__(self, datasets, sample_ratios=1): + super(ConcatDataset, self).__init__() + assert len(datasets) > 0, "datasets should not be an empty iterable" + self.datasets = list(datasets) + if isinstance(sample_ratios, int): + sample_ratios = [sample_ratios] * len(self.datasets) + self.sample_ratios = sample_ratios + self.cumulative_sizes = self.cumsum(self.datasets, sample_ratios) + self.real_sizes = [len(d) for d in self.datasets] + + def __len__(self): + return self.cumulative_sizes[-1] + + def __getitem__(self, idx): + dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx) + return self.datasets[dataset_idx][sample_idx] + + def _get_dataset_and_sample_index(self, idx: int): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + sample_idx = sample_idx % self.real_sizes[dataset_idx] + return dataset_idx, sample_idx + + def collater(self, samples, **extra_args): + # For now only supports datasets with same underlying collater implementations + if hasattr(self.datasets[0], "collater"): + return self.datasets[0].collater(samples, **extra_args) + else: + return default_collate(samples, **extra_args) + + def size(self, idx: int): + """ + Return an example's size as a float or tuple. + """ + dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx) + return self.datasets[dataset_idx].size(sample_idx) + + def num_tokens(self, index: int): + return np.max(self.size(index)) + + def attr(self, attr: str, index: int): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, index) + return getattr(self.datasets[dataset_idx], attr, None) + + @property + def sizes(self): + _dataset_sizes = [] + for ds, sr in zip(self.datasets, self.sample_ratios): + if isinstance(ds.sizes, np.ndarray): + _dataset_sizes.append(np.tile(ds.sizes, sr)) + else: + # Only support underlying dataset with single size array. + assert isinstance(ds.sizes, list) + _dataset_sizes.append(np.tile(ds.sizes[0], sr)) + return np.concatenate(_dataset_sizes) + + @property + def supports_prefetch(self): + return all(d.supports_prefetch for d in self.datasets) + + def ordered_indices(self): + """ + Returns indices sorted by length. So less padding is needed. + """ + if isinstance(self.sizes, np.ndarray) and len(self.sizes.shape) > 1: + # special handling for concatenating lang_pair_datasets + if getattr(self.datasets[0], "shuffle", False): + indices = np.random.permutation(len(self)).astype(np.int64) + else: + indices = np.arange(len(self), dtype=np.int64) + sizes = self.sizes + tgt_sizes = ( + sizes[:, 1] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else None + ) + src_sizes = ( + sizes[:, 0] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else sizes + ) + # sort by target length, then source length + if tgt_sizes is not None: + indices = indices[np.argsort(tgt_sizes[indices], kind="mergesort")] + return indices[np.argsort(src_sizes[indices], kind="mergesort")] + else: + return np.argsort(self.sizes) + + def prefetch(self, indices): + frm = 0 + for to, ds in zip(self.cumulative_sizes, self.datasets): + real_size = len(ds) + if getattr(ds, "supports_prefetch", False): + ds.prefetch([(i - frm) % real_size for i in indices if frm <= i < to]) + frm = to + + @property + def can_reuse_epoch_itr_across_epochs(self): + return all(d.can_reuse_epoch_itr_across_epochs for d in self.datasets) + + def set_epoch(self, epoch): + super().set_epoch(epoch) + for ds in self.datasets: + if hasattr(ds, "set_epoch"): + ds.set_epoch(epoch) diff --git a/SpeechT5/SpeechLM/speechlm/data/hubert_dataset.py b/SpeechT5/SpeechLM/speechlm/data/hubert_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..cef948f8ecc38deb766ef9031578e1b8d8741170 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data/hubert_dataset.py @@ -0,0 +1,599 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import itertools +import logging +import io +import os +import sys +import time +from pathlib import Path +from typing import Any, List, Optional, Union, Tuple + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils, Dictionary +from fairseq.data.fairseq_dataset import FairseqDataset +from fairseq.data.audio.audio_utils import ( + read_from_stored_zip, + is_sf_audio_data, +) + +FEATURE_OR_SF_AUDIO_FILE_EXTENSIONS = {".npy", ".wav", ".flac", ".ogg"} + +logger = logging.getLogger(__name__) + +def parse_path(path: str) -> Tuple[str, List[int]]: + """Parse data path which is either a path to + 1. a .npy/.wav/.flac/.ogg file + 2. a stored ZIP file with slicing info: "[zip_path]:[offset]:[length]" + + Args: + path (str): the data path to parse + + Returns: + file_path (str): the file path + slice_ptr (list of int): empty in case 1; + byte offset and length for the slice in case 2 + """ + + if Path(path).suffix in FEATURE_OR_SF_AUDIO_FILE_EXTENSIONS: + _path, slice_ptr = path, [] + else: + _path, *slice_ptr = path.split(":") + if not Path(_path).is_file(): + raise FileNotFoundError(f"File not found: {_path}") + assert len(slice_ptr) in {0, 1, 2}, f"Invalid path: {path}" + slice_ptr = [int(i) for i in slice_ptr] + return _path, slice_ptr + +def load_audio(manifest_path, max_keep, min_keep, retry_times=5): + n_long, n_short = 0, 0 + names, inds, sizes, chunk_names, chunk_indices = [], [], [], [], [] + for i in range(retry_times): + with open(manifest_path) as f: + root = f.readline().strip() + for ind, line in enumerate(f): + items = line.strip().split("\t") + assert len(items) == 2, line + sz = int(items[1]) + if min_keep is not None and sz < min_keep: + n_short += 1 + elif max_keep is not None and sz > max_keep: + n_long += 1 + else: + fname = items[0].split(":") + if len(fname) > 2: + if len(chunk_names) == 0 or fname[0] != chunk_names[-1]: + chunk_names.append(fname[0]) + chunk_indices.append(len(names)) + names.append(items[0]) + inds.append(ind) + sizes.append(sz) + if len(names) == 0: + logger.warn(f"Fail to load manifest for the {i} time") + time.sleep(1) + continue + else: + break + tot = ind + 1 + logger.info( + ( + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, names, inds, tot, sizes, chunk_names, chunk_indices + + +def load_label(label_path, inds, tot, retry_times=5): + for i in range(retry_times): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + if len(labels) == 0: + logger.warn(f"Fail to load label for the {i} time") + time.sleep(1) + continue + else: + break + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds, tot, retry_times=5): + for i in range(retry_times): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + if len(code_lengths) == 0: + logger.warn(f"Fail to load label for the {i} time") + time.sleep(1) + continue + else: + break + assert ( + len(code_lengths) == tot + ), f"number of labels does not match ({len(code_lengths)} != {tot})" + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + +def verify_label_lengths( + audio_sizes, + audio_rate, + label_path, + label_rate, + inds, + tot, + tol=0.1, # tolerance in seconds +): + if label_rate < 0: + logger.info(f"{label_path} is sequence label. skipped") + return + + with open(label_path) as f: + lengths = [len(line.rstrip().split()) for line in f] + assert len(lengths) == tot + lengths = [lengths[i] for i in inds] + num_invalid = 0 + for i, ind in enumerate(inds): + dur_from_audio = audio_sizes[i] / audio_rate + dur_from_label = lengths[i] / label_rate + if abs(dur_from_audio - dur_from_label) > tol: + logger.warning( + ( + f"audio and label duration differ too much " + f"(|{dur_from_audio} - {dur_from_label}| > {tol}) " + f"in line {ind+1} of {label_path}. Check if `label_rate` " + f"is correctly set (currently {label_rate}). " + f"num. of samples = {audio_sizes[i]}; " + f"label length = {lengths[i]}" + ) + ) + num_invalid += 1 + if num_invalid > 0: + logger.warning( + f"total {num_invalid} (audio, label) pairs with mismatched lengths" + ) + + +class HubertDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + random_crop: bool = False, + single_target: bool = False, + tgt_dict: Optional[Dictionary] = None, + add_decoder_target: bool = False, + fine_tuning: bool = False, + tgt_lang_idx: int = None, + tokenizer = None, + mbart_style_lang_id: bool = False, + retry_times: int = 5, + reduce_label_for_dec: bool = True, + ): + self.audio_root, self.audio_names, inds, tot, self.wav_sizes, self.chunk_names, self.chunk_indices = load_audio( + manifest_path, max_keep_sample_size, min_keep_sample_size, retry_times + ) + self.sample_rate = sample_rate + self.shuffle = shuffle + self.random_crop = random_crop + self.tgt_dict = tgt_dict + self.add_decoder_target = add_decoder_target + self.fine_tuning = fine_tuning + + self.num_labels = len(label_paths) + self.pad_list = pad_list + self.eos_list = eos_list + self.label_processors = label_processors + self.single_target = single_target + self.epoch = 0 + + self.label_rates = ( + [label_rates for _ in range(len(label_paths))] + if isinstance(label_rates, int) + else label_rates + ) + self.store_labels = store_labels + if store_labels: + self.label_list = [load_label(p, inds, tot, retry_times) for p in label_paths] + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds, tot, retry_times) for p in label_paths + ] + assert label_processors is None or len(label_processors) == self.num_labels + for label_path, label_rate in zip(label_paths, self.label_rates): + verify_label_lengths( + self.wav_sizes, sample_rate, label_path, label_rate, inds, tot + ) + + self.max_sample_size = ( + max_sample_size if max_sample_size is not None else sys.maxsize + ) + self.pad_audio = pad_audio + self.normalize = normalize + self.tgt_lang_idx = tgt_lang_idx + self.tokenizer = tokenizer + self.mbart_style_lang_id = mbart_style_lang_id + self.retry_times = retry_times + self.reduce_label_for_dec = reduce_label_for_dec + logger.info( + f"pad_audio={pad_audio}, random_crop={random_crop}, tgt_lang_idx={self.tgt_lang_idx}, reduce_label_for_dec={reduce_label_for_dec}, " + f"mbart_style_lang_id={mbart_style_lang_id}, normalize={normalize}, max_sample_size={self.max_sample_size}" + ) + + def set_epoch(self, epoch): + self.epoch = epoch + + def batch_by_size(self, indices, max_tokens=None, max_sentences=None, required_batch_size_multiple=1): + self.max_tokens = max_tokens + self.max_sentences = max_sentences + self.required_batch_size_multiple = required_batch_size_multiple + if isinstance(indices[0], np.ndarray): + batch_list = [] + for indice in indices: + batch = super(HubertDataset, self).batch_by_size(indice, max_tokens, max_sentences, required_batch_size_multiple) + batch_list.append(batch) + return batch_list + else: + return super(HubertDataset, self).batch_by_size(indices, max_tokens, max_sentences, required_batch_size_multiple) + def shuffle_batches(self, batches, seed): + if isinstance(batches[0], list): + new_batches = [] + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + for batch in batches: + np.random.shuffle(batch) + new_batches.extend(batch) + return new_batches + else: + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + return batches + + def get_audio(self, index): + import soundfile as sf + + wav_path = os.path.join(self.audio_root, self.audio_names[index]) + _path, slice_ptr = parse_path(wav_path) + if len(slice_ptr) == 1: + import kaldiio + feat = kaldiio.load_mat(wav_path) + feat = torch.from_numpy(feat).float() + if self.normalize: + with torch.no_grad(): + feat = F.layer_norm(feat, feat.shape[-1]) + return feat + else: + if len(slice_ptr) == 2: + byte_data = read_from_stored_zip(_path, slice_ptr[0], slice_ptr[1]) + assert is_sf_audio_data(byte_data) + wav_path = io.BytesIO(byte_data) + for i in range(self.retry_times): + if i < self.retry_times - 1: + try: + wav, cur_sample_rate = sf.read(wav_path) + break + except Exception as e: + logger.warn(f"Fail to load wav for the {i} time") + logger.warn(e) + time.sleep(1) + continue + else: + wav, cur_sample_rate = sf.read(wav_path) + + wav = torch.from_numpy(wav).float() + wav = self.postprocess(wav, cur_sample_rate) + return wav + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.tokenizer is not None and self.fine_tuning: + label = self.tokenizer.encode(label) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + def __getitem__(self, index): + wav = self.get_audio(index) + labels = self.get_labels(index) + return {"id": index, "source": wav, "label_list": labels} + + def __len__(self): + return len(self.wav_sizes) + + def crop_to_max_size(self, wav, target_size): + size = len(wav) + diff = size - target_size + if diff <= 0: + return wav, 0 + + start, end = 0, target_size + if self.random_crop: + start = np.random.randint(0, diff + 1) + end = size - diff + start + return wav[start:end], start + + def collater(self, samples): + # target = max(sizes) -> random_crop not used + # target = max_sample_size -> random_crop used for long + samples = [s for s in samples if s["source"] is not None] + if len(samples) == 0: + return {} + + audios = [s["source"] for s in samples] + audio_sizes = [len(s) for s in audios] + if self.pad_audio: + audio_size = min(max(audio_sizes), self.max_sample_size) + else: + audio_size = min(min(audio_sizes), self.max_sample_size) + feat_dim = audios[0].size(-1) if audios[0].dim() > 1 else 1 + collated_audios, padding_mask, audio_starts = self.collater_audio( + audios, audio_size, feat_dim, + ) + + targets_by_label = [ + [s["label_list"][i] for s in samples] for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, audio_size, audio_starts + ) + + if self.add_decoder_target: + if self.fine_tuning: + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]], torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + else: + if self.tokenizer is not None: + decoder_label = [ + # Set 48 for translate int to char and avoid \n + torch.cat( + ( + torch.tensor( + self.tokenizer.sp.Encode( + "".join( + [chr(j + 48) for j in ( + targets_list[0][i, :lengths_list[0][i]].unique_consecutive() if self.reduce_label_for_dec else targets_list[0][i, :lengths_list[0][i]] + ).tolist()] + ), out_type=int + ) + ), + torch.tensor([self.tgt_dict.eos()]) + ), dim=0 + ).long() + for i in range(targets_list[0].size(0)) + ] + else: + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]].unique_consecutive() if self.reduce_label_for_dec else targets_list[0][i, :lengths_list[0][i]], torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + + if self.mbart_style_lang_id: + decoder_label = [ + torch.cat((decoder_label[i], torch.tensor([self.tgt_lang_idx])), 0).long() + for i in range(targets_list[0].size(0)) + ] + + dec_ntokens = sum(x.size(0) for x in decoder_label) + decoder_target = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos() if not self.mbart_style_lang_id else self.tgt_lang_idx, + left_pad=False, + move_eos_to_beginning=False, + ) + decoder_target_lengths = torch.tensor( + [x.size(0) for x in decoder_label], dtype=torch.long + ) + prev_output_tokens = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos() if not self.mbart_style_lang_id else self.tgt_lang_idx, + left_pad=False, + move_eos_to_beginning=True, + ) + + if self.tgt_lang_idx is not None and not self.mbart_style_lang_id: + assert (prev_output_tokens[:, 0] != self.tgt_dict.eos()).sum() == 0 + prev_output_tokens[:, 0] = self.tgt_lang_idx + + net_input = { + "source": collated_audios, + "padding_mask": padding_mask, + "prev_output_tokens": prev_output_tokens, + } + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + "decoder_target": decoder_target, + "decoder_target_lengths": decoder_target_lengths, + "dec_ntokens": dec_ntokens, + "lang_idx": self.tgt_lang_idx, + } + else: + net_input = {"source": collated_audios, "padding_mask": padding_mask} + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + return batch + + def collater_audio(self, audios, audio_size, feat_dim=1): + collated_audios = audios[0].new_zeros(len(audios), audio_size, feat_dim) + padding_mask = ( + torch.BoolTensor(collated_audios.shape[0:2]).fill_(False) + # if self.pad_audio else None + ) + audio_starts = [0 for _ in audios] + for i, audio in enumerate(audios): + audio = audio.view(-1, feat_dim) + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + assert self.pad_audio + collated_audios[i] = torch.cat([audio, audio.new_full((-diff, feat_dim), 0.0)]) + padding_mask[i, diff:] = True + else: + collated_audios[i], audio_starts[i] = self.crop_to_max_size( + audio, audio_size + ) + return collated_audios.squeeze(-1), padding_mask, audio_starts + + def collater_frm_label(self, targets, audio_size, audio_starts, label_rate, pad): + assert label_rate > 0 + s2f = label_rate / self.sample_rate + frm_starts = [int(round(s * s2f)) for s in audio_starts] + frm_size = int(round(audio_size * s2f)) + if not self.pad_audio: + rem_size = [len(t) - s for t, s in zip(targets, frm_starts)] + frm_size = min(frm_size, *rem_size) + targets = [t[s : s + frm_size] for t, s in zip(targets, frm_starts)] + logger.debug(f"audio_starts={audio_starts}") + logger.debug(f"frame_starts={frm_starts}") + logger.debug(f"frame_size={frm_size}") + + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_label(self, targets_by_label, audio_size, audio_starts): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + if label_rate == -1: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + else: + targets, lengths, ntokens = self.collater_frm_label( + targets, audio_size, audio_starts, label_rate, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + if self.pad_audio: + return self.wav_sizes[index] + return min(self.wav_sizes[index], self.max_sample_size) + + @property + def sizes(self): + return np.array(self.wav_sizes) + + def ordered_indices(self): + """Return an ordered list of indices. Batches will be constructed based + on this order.""" + + if self.shuffle: + if len(self.chunk_names) > 0: + logger.info(f"ordered indices for epoch {self.epoch}") + with data_utils.numpy_seed(self.epoch): + self.chunk_order = np.random.permutation(len(self.chunk_names)) + chunk_count = 0 + tmp_sizes = [] + tmp_indices = [] + indice = [] + for i in self.chunk_order: + chunk_count += 1 + start = self.chunk_indices[i] + end = self.chunk_indices[i+1] if i < len(self.chunk_names) - 1 else len(self) + size = list(self.sizes[start:end]) + tmp_indices.extend(list(np.arange(start, end))) + tmp_sizes.extend(size) + if chunk_count % 10 == 0 or i == self.chunk_order[0]: + order = [np.random.permutation(len(tmp_indices))] + order.append( + np.minimum( + np.array(tmp_sizes), + self.max_sample_size, + ) + ) + sort_idx = np.lexsort(order)[::-1] + indice.append(np.array([tmp_indices[k] for k in sort_idx])) + tmp_indices = [] + tmp_sizes =[] + return indice + else: + order = [np.random.permutation(len(self))] + order.append( + np.minimum( + np.array(self.sizes), + self.max_sample_size, + ) + ) + return np.lexsort(order)[::-1] + else: + return np.arange(len(self)) + + def postprocess(self, wav, cur_sample_rate): + if wav.dim() == 2: + wav = wav.mean(-1) + assert wav.dim() == 1, wav.dim() + + if cur_sample_rate != self.sample_rate: + raise Exception(f"sr {cur_sample_rate} != {self.sample_rate}") + + if self.normalize: + with torch.no_grad(): + wav = F.layer_norm(wav, wav.shape) + return wav diff --git a/SpeechT5/SpeechLM/speechlm/data/language_trible_dataset.py b/SpeechT5/SpeechLM/speechlm/data/language_trible_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..587a0450e36460416ec6b0882d5b1eda65ad3f13 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data/language_trible_dataset.py @@ -0,0 +1,671 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import numpy as np +import torch +import os +import itertools + +from fairseq.data import FairseqDataset, data_utils +from fairseq.data import ( + AppendTokenDataset, + ConcatDataset, + PrependTokenDataset, + data_utils, + indexed_dataset, +) + +logger = logging.getLogger(__name__) + +def load_langtriple_dataset( + data_path, + split, + src, + src_dict, + ref, + ref_dict, + tgt, + tgt_dict, + combine, + dataset_impl, + upsample_primary, + left_pad_source, + left_pad_target, + max_source_positions, + max_target_positions, + prepend_bos=False, + load_alignments=False, + truncate_source=False, + append_source_id=False, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + prepend_bos_src=None, + lang_format="[{}]", +): + assert not truncate_source + def split_exists(split, src, ref, tgt, lang, data_path): + filename = os.path.join(data_path, "{}.{}-{}-{}.{}".format(split, src, ref, tgt, lang)) + return indexed_dataset.dataset_exists(filename, impl=dataset_impl) + + src_datasets = [] + ref_datasets = [] + tgt_datasets = [] + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + + # infer langcode + if split_exists(split_k, src, ref, tgt, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}-{}.".format(split_k, src, ref, tgt)) + elif split_exists(split_k, tgt, ref, src, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}-{}.".format(split_k, tgt, ref, src)) + else: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + src_dataset = data_utils.load_indexed_dataset( + prefix + src, src_dict, dataset_impl + ) + src_datasets.append(src_dataset) + + ref_dataset = data_utils.load_indexed_dataset( + prefix + ref, ref_dict, dataset_impl + ) + ref_datasets.append(ref_dataset) + + tgt_dataset = data_utils.load_indexed_dataset( + prefix + tgt, tgt_dict, dataset_impl + ) + if tgt_dataset is not None: + tgt_datasets.append(tgt_dataset) + + logger.info( + "{} {} {}-{}-{} {} examples".format( + data_path, split_k, src, ref, tgt, len(src_datasets[-1]) + ) + ) + + if not combine: + break + + assert len(src_datasets) == len(ref_datasets) + assert len(src_datasets) == len(tgt_datasets) or len(tgt_datasets) == 0 + + if len(src_datasets) == 1: + src_dataset = src_datasets[0] + ref_dataset = ref_datasets[0] + tgt_dataset = tgt_datasets[0] if len(tgt_datasets) > 0 else None + else: + sample_ratios = [1] * len(src_datasets) + sample_ratios[0] = upsample_primary + src_dataset = ConcatDataset(src_datasets, sample_ratios) + ref_dataset = ConcatDataset(ref_datasets, sample_ratios) + if len(tgt_datasets) > 0: + tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios) + else: + tgt_dataset = None + + if prepend_bos: + assert hasattr(src_dict, "bos_index") and hasattr(ref_dict, "bos_index") and hasattr(tgt_dict, "bos_index") + src_dataset = PrependTokenDataset(src_dataset, src_dict.bos()) + ref_dataset = PrependTokenDataset(ref_dataset, ref_dict.bos()) + if tgt_dataset is not None: + tgt_dataset = PrependTokenDataset(tgt_dataset, tgt_dict.bos()) + elif prepend_bos_src is not None: + logger.info(f"prepending src bos: {prepend_bos_src}") + src_dataset = PrependTokenDataset(src_dataset, prepend_bos_src) + ref_dataset = PrependTokenDataset(ref_dataset, prepend_bos_src) + + eos = None + if append_source_id: + src_dataset = AppendTokenDataset( + src_dataset, src_dict.index(lang_format.format(src)) + ) + ref_dataset = AppendTokenDataset( + ref_dataset, ref_dict.index(lang_format.format(ref)) + ) + if tgt_dataset is not None: + tgt_dataset = AppendTokenDataset( + tgt_dataset, tgt_dict.index(lang_format.format(tgt)) + ) + eos = tgt_dict.index(lang_format.format(tgt)) + + align_dataset = None + if load_alignments: + align_path = os.path.join(data_path, "{}.align.{}-{}".format(split, src, tgt)) + if indexed_dataset.dataset_exists(align_path, impl=dataset_impl): + align_dataset = data_utils.load_indexed_dataset( + align_path, None, dataset_impl + ) + + tgt_dataset_sizes = tgt_dataset.sizes if tgt_dataset is not None else None + return LanguageTripleDataset( + src_dataset, + src_dataset.sizes, + src_dict, + ref_dataset, + ref_dataset.sizes, + ref_dict, + tgt_dataset, + tgt_dataset_sizes, + tgt_dict, + left_pad_source=left_pad_source, + left_pad_target=left_pad_target, + align_dataset=align_dataset, + eos=eos, + num_buckets=num_buckets, + shuffle=shuffle, + pad_to_multiple=pad_to_multiple, + ) + + +def collate( + samples, + pad_idx, + eos_idx, + left_pad_source=True, + left_pad_target=False, + input_feeding=True, + pad_to_length=None, + pad_to_multiple=1, +): + if len(samples) == 0: + return {} + + def merge(key, left_pad, move_eos_to_beginning=False, pad_to_length=None): + return data_utils.collate_tokens( + [s[key] for s in samples], + pad_idx, + None, + left_pad, + move_eos_to_beginning, + pad_to_length=pad_to_length, + pad_to_multiple=pad_to_multiple, + ) + + def check_alignment(alignment, src_len, tgt_len): + if alignment is None or len(alignment) == 0: + return False + if ( + alignment[:, 0].max().item() >= src_len - 1 + or alignment[:, 1].max().item() >= tgt_len - 1 + ): + logger.warning("alignment size mismatch found, skipping alignment!") + return False + return True + + def compute_alignment_weights(alignments): + """ + Given a tensor of shape [:, 2] containing the source-target indices + corresponding to the alignments, a weight vector containing the + inverse frequency of each target index is computed. + For e.g. if alignments = [[5, 7], [2, 3], [1, 3], [4, 2]], then + a tensor containing [1., 0.5, 0.5, 1] should be returned (since target + index 3 is repeated twice) + """ + align_tgt = alignments[:, 1] + _, align_tgt_i, align_tgt_c = torch.unique( + align_tgt, return_inverse=True, return_counts=True + ) + align_weights = align_tgt_c[align_tgt_i[np.arange(len(align_tgt))]] + return 1.0 / align_weights.float() + + id = torch.LongTensor([s["id"] for s in samples]) + src_tokens = merge( + "source", + left_pad=left_pad_source, + pad_to_length=pad_to_length["source"] if pad_to_length is not None else None, + ) + ref_tokens = merge( + "reference", + left_pad=left_pad_source, + pad_to_length=pad_to_length["source"] if pad_to_length is not None else None, + ) + # sort by descending source length + src_lengths = torch.LongTensor( + [s["source"].ne(pad_idx).long().sum() for s in samples] + ) + ref_lengths = torch.LongTensor( + [s["reference"].ne(pad_idx).long().sum() for s in samples] + ) + src_lengths, sort_order = src_lengths.sort(descending=True) + id = id.index_select(0, sort_order) + src_tokens = src_tokens.index_select(0, sort_order) + ref_lengths = ref_lengths.index_select(0, sort_order) + ref_tokens = ref_tokens.index_select(0, sort_order) + + prev_output_tokens = None + target = None + if samples[0].get("target", None) is not None: + target = merge( + "target", + left_pad=left_pad_target, + pad_to_length=pad_to_length["target"] + if pad_to_length is not None + else None, + ) + target = target.index_select(0, sort_order) + tgt_lengths = torch.LongTensor( + [s["target"].ne(pad_idx).long().sum() for s in samples] + ).index_select(0, sort_order) + ntokens = tgt_lengths.sum().item() + + if samples[0].get("prev_output_tokens", None) is not None: + prev_output_tokens = merge("prev_output_tokens", left_pad=left_pad_target) + elif input_feeding: + # we create a shifted version of targets for feeding the + # previous output token(s) into the next decoder step + prev_output_tokens = merge( + "target", + left_pad=left_pad_target, + move_eos_to_beginning=True, + pad_to_length=pad_to_length["target"] + if pad_to_length is not None + else None, + ) + else: + ntokens = src_lengths.sum().item() + + batch = { + "id": id, + "nsentences": len(samples), + "ntokens": ntokens, + "net_input": { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + }, + "target": target, + "ref_tokens": ref_tokens, + "ref_lengths": ref_lengths, + } + if prev_output_tokens is not None: + batch["net_input"]["prev_output_tokens"] = prev_output_tokens.index_select( + 0, sort_order + ) + + if samples[0].get("alignment", None) is not None: + bsz, tgt_sz = batch["target"].shape + src_sz = batch["net_input"]["src_tokens"].shape[1] + + offsets = torch.zeros((len(sort_order), 2), dtype=torch.long) + offsets[:, 1] += torch.arange(len(sort_order), dtype=torch.long) * tgt_sz + if left_pad_source: + offsets[:, 0] += src_sz - src_lengths + if left_pad_target: + offsets[:, 1] += tgt_sz - tgt_lengths + + alignments = [ + alignment + offset + for align_idx, offset, src_len, tgt_len in zip( + sort_order, offsets, src_lengths, tgt_lengths + ) + for alignment in [samples[align_idx]["alignment"].view(-1, 2)] + if check_alignment(alignment, src_len, tgt_len) + ] + + if len(alignments) > 0: + alignments = torch.cat(alignments, dim=0) + align_weights = compute_alignment_weights(alignments) + + batch["alignments"] = alignments + batch["align_weights"] = align_weights + + if samples[0].get("constraints", None) is not None: + # Collate the packed constraints across the samples, padding to + # the length of the longest sample. + lens = [sample.get("constraints").size(0) for sample in samples] + max_len = max(lens) + constraints = torch.zeros((len(samples), max(lens))).long() + for i, sample in enumerate(samples): + constraints[i, 0 : lens[i]] = samples[i].get("constraints") + batch["constraints"] = constraints.index_select(0, sort_order) + + return batch + + +class LanguageTripleDataset(FairseqDataset): + """ + A pair of torch.utils.data.Datasets. + + Args: + src (torch.utils.data.Dataset): source dataset to wrap + src_sizes (List[int]): source sentence lengths + src_dict (~fairseq.data.Dictionary): source vocabulary + tgt (torch.utils.data.Dataset, optional): target dataset to wrap + tgt_sizes (List[int], optional): target sentence lengths + tgt_dict (~fairseq.data.Dictionary, optional): target vocabulary + left_pad_source (bool, optional): pad source tensors on the left side + (default: True). + left_pad_target (bool, optional): pad target tensors on the left side + (default: False). + shuffle (bool, optional): shuffle dataset elements before batching + (default: True). + input_feeding (bool, optional): create a shifted version of the targets + to be passed into the model for teacher forcing (default: True). + remove_eos_from_source (bool, optional): if set, removes eos from end + of source if it's present (default: False). + append_eos_to_target (bool, optional): if set, appends eos to end of + target if it's absent (default: False). + align_dataset (torch.utils.data.Dataset, optional): dataset + containing alignments. + constraints (Tensor, optional): 2d tensor with a concatenated, zero- + delimited list of constraints for each sentence. + append_bos (bool, optional): if set, appends bos to the beginning of + source/target sentence. + num_buckets (int, optional): if set to a value greater than 0, then + batches will be bucketed into the given number of batch shapes. + src_lang_id (int, optional): source language ID, if set, the collated batch + will contain a field 'src_lang_id' in 'net_input' which indicates the + source language of the samples. + tgt_lang_id (int, optional): target language ID, if set, the collated batch + will contain a field 'tgt_lang_id' which indicates the target language + of the samples. + """ + + def __init__( + self, + src, + src_sizes, + src_dict, + ref, + ref_sizes, + ref_dict, + tgt=None, + tgt_sizes=None, + tgt_dict=None, + left_pad_source=True, + left_pad_target=False, + shuffle=True, + input_feeding=True, + remove_eos_from_source=False, + append_eos_to_target=False, + align_dataset=None, + constraints=None, + append_bos=False, + eos=None, + num_buckets=0, + src_lang_id=None, + tgt_lang_id=None, + pad_to_multiple=1, + ): + if tgt_dict is not None: + assert src_dict.pad() == tgt_dict.pad() + assert src_dict.eos() == tgt_dict.eos() + assert src_dict.unk() == tgt_dict.unk() + if tgt is not None: + assert len(src) == len( + tgt + ), "Source and target must contain the same number of examples" + assert len(src) == len( + ref + ), "Source and reference must contain the same number of examples" + self.src = src + self.ref = ref + self.tgt = tgt + self.src_sizes = np.array(src_sizes) + self.ref_sizes = np.array(ref_sizes) + self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None + self.sizes = ( + np.vstack((self.src_sizes, self.tgt_sizes)).T + if self.tgt_sizes is not None + else self.src_sizes + ) + self.src_dict = src_dict + self.ref_dict = ref_dict + self.tgt_dict = tgt_dict + self.left_pad_source = left_pad_source + self.left_pad_target = left_pad_target + self.shuffle = shuffle + self.input_feeding = input_feeding + self.remove_eos_from_source = remove_eos_from_source + self.append_eos_to_target = append_eos_to_target + self.align_dataset = align_dataset + if self.align_dataset is not None: + assert ( + self.tgt_sizes is not None + ), "Both source and target needed when alignments are provided" + self.constraints = constraints + self.append_bos = append_bos + self.eos = eos if eos is not None else src_dict.eos() + self.src_lang_id = src_lang_id + self.tgt_lang_id = tgt_lang_id + if num_buckets > 0: + from fairseq.data import BucketPadLengthDataset + + self.src = BucketPadLengthDataset( + self.src, + sizes=self.src_sizes, + num_buckets=num_buckets, + pad_idx=self.src_dict.pad(), + left_pad=self.left_pad_source, + ) + self.src_sizes = self.src.sizes + logger.info("bucketing source lengths: {}".format(list(self.src.buckets))) + self.ref = BucketPadLengthDataset( + self.ref, + sizes=self.ref_sizes, + num_buckets=num_buckets, + pad_idx=self.ref_dict.pad(), + left_pad=self.left_pad_source, + ) + self.ref_sizes = self.ref.sizes + logger.info("bucketing reference lengths: {}".format(list(self.src.buckets))) + if self.tgt is not None: + self.tgt = BucketPadLengthDataset( + self.tgt, + sizes=self.tgt_sizes, + num_buckets=num_buckets, + pad_idx=self.tgt_dict.pad(), + left_pad=self.left_pad_target, + ) + self.tgt_sizes = self.tgt.sizes + logger.info( + "bucketing target lengths: {}".format(list(self.tgt.buckets)) + ) + + # determine bucket sizes using self.num_tokens, which will return + # the padded lengths (thanks to BucketPadLengthDataset) + num_tokens = np.vectorize(self.num_tokens, otypes=[np.compat.long]) + self.bucketed_num_tokens = num_tokens(np.arange(len(self.src))) + self.buckets = [ + (None, num_tokens) for num_tokens in np.unique(self.bucketed_num_tokens) + ] + else: + self.buckets = None + self.pad_to_multiple = pad_to_multiple + + def get_batch_shapes(self): + return self.buckets + + def __getitem__(self, index): + tgt_item = self.tgt[index] if self.tgt is not None else None + src_item = self.src[index] + ref_item = self.ref[index] + # Append EOS to end of tgt sentence if it does not have an EOS and remove + # EOS from end of src sentence if it exists. This is useful when we use + # use existing datasets for opposite directions i.e., when we want to + # use tgt_dataset as src_dataset and vice versa + if self.append_eos_to_target: + eos = self.tgt_dict.eos() if self.tgt_dict else self.src_dict.eos() + if self.tgt and self.tgt[index][-1] != eos: + tgt_item = torch.cat([self.tgt[index], torch.LongTensor([eos])]) + + if self.append_bos: + bos = self.tgt_dict.bos() if self.tgt_dict else self.src_dict.bos() + if self.tgt and self.tgt[index][0] != bos: + tgt_item = torch.cat([torch.LongTensor([bos]), self.tgt[index]]) + + bos = self.src_dict.bos() + if self.src[index][0] != bos: + src_item = torch.cat([torch.LongTensor([bos]), self.src[index]]) + if self.ref[index][0] != bos: + ref_item = torch.cat([torch.LongTensor([bos]), self.ref[index]]) + + if self.remove_eos_from_source: + eos = self.src_dict.eos() + if self.src[index][-1] == eos: + src_item = self.src[index][:-1] + if self.ref[index][-1] == eos: + ref_item = self.ref[index][:-1] + + example = { + "id": index, + "source": src_item, + "reference": ref_item, + "target": tgt_item, + } + if self.align_dataset is not None: + example["alignment"] = self.align_dataset[index] + if self.constraints is not None: + example["constraints"] = self.constraints[index] + return example + + def __len__(self): + return len(self.src) + + def collater(self, samples, pad_to_length=None): + """Merge a list of samples to form a mini-batch. + + Args: + samples (List[dict]): samples to collate + pad_to_length (dict, optional): a dictionary of + {'source': source_pad_to_length, 'target': target_pad_to_length} + to indicate the max length to pad to in source and target respectively. + + Returns: + dict: a mini-batch with the following keys: + + - `id` (LongTensor): example IDs in the original input order + - `ntokens` (int): total number of tokens in the batch + - `net_input` (dict): the input to the Model, containing keys: + + - `src_tokens` (LongTensor): a padded 2D Tensor of tokens in + the source sentence of shape `(bsz, src_len)`. Padding will + appear on the left if *left_pad_source* is ``True``. + - `src_lengths` (LongTensor): 1D Tensor of the unpadded + lengths of each source sentence of shape `(bsz)` + - `prev_output_tokens` (LongTensor): a padded 2D Tensor of + tokens in the target sentence, shifted right by one + position for teacher forcing, of shape `(bsz, tgt_len)`. + This key will not be present if *input_feeding* is + ``False``. Padding will appear on the left if + *left_pad_target* is ``True``. + - `src_lang_id` (LongTensor): a long Tensor which contains source + language IDs of each sample in the batch + + - `target` (LongTensor): a padded 2D Tensor of tokens in the + target sentence of shape `(bsz, tgt_len)`. Padding will appear + on the left if *left_pad_target* is ``True``. + - `tgt_lang_id` (LongTensor): a long Tensor which contains target language + IDs of each sample in the batch + """ + res = collate( + samples, + pad_idx=self.src_dict.pad(), + eos_idx=self.eos, + left_pad_source=self.left_pad_source, + left_pad_target=self.left_pad_target, + input_feeding=self.input_feeding, + pad_to_length=pad_to_length, + pad_to_multiple=self.pad_to_multiple, + ) + if self.src_lang_id is not None or self.tgt_lang_id is not None: + src_tokens = res["net_input"]["src_tokens"] + bsz = src_tokens.size(0) + if self.src_lang_id is not None: + res["net_input"]["src_lang_id"] = ( + torch.LongTensor([[self.src_lang_id]]).expand(bsz, 1).to(src_tokens) + ) + if self.tgt_lang_id is not None: + res["tgt_lang_id"] = ( + torch.LongTensor([[self.tgt_lang_id]]).expand(bsz, 1).to(src_tokens) + ) + return res + + def num_tokens(self, index): + """Return the number of tokens in a sample. This value is used to + enforce ``--max-tokens`` during batching.""" + return max( + self.src_sizes[index], + self.tgt_sizes[index] if self.tgt_sizes is not None else 0, + ) + + def num_tokens_vec(self, indices): + """Return the number of tokens for a set of positions defined by indices. + This value is used to enforce ``--max-tokens`` during batching.""" + sizes = self.src_sizes[indices] + if self.tgt_sizes is not None: + sizes = np.maximum(sizes, self.tgt_sizes[indices]) + return sizes + + def size(self, index): + """Return an example's size as a float or tuple. This value is used when + filtering a dataset with ``--max-positions``.""" + return ( + self.src_sizes[index], + self.tgt_sizes[index] if self.tgt_sizes is not None else 0, + ) + + def ordered_indices(self): + """Return an ordered list of indices. Batches will be constructed based + on this order.""" + if self.shuffle: + indices = np.random.permutation(len(self)).astype(np.int64) + else: + indices = np.arange(len(self), dtype=np.int64) + if self.buckets is None: + # sort by target length, then source length + if self.tgt_sizes is not None: + indices = indices[np.argsort(self.tgt_sizes[indices], kind="mergesort")] + return indices[np.argsort(self.src_sizes[indices], kind="mergesort")] + else: + # sort by bucketed_num_tokens, which is: + # max(padded_src_len, padded_tgt_len) + return indices[ + np.argsort(self.bucketed_num_tokens[indices], kind="mergesort") + ] + + @property + def supports_prefetch(self): + return getattr(self.src, "supports_prefetch", False) and ( + getattr(self.tgt, "supports_prefetch", False) or self.tgt is None + ) + + def prefetch(self, indices): + self.src.prefetch(indices) + if self.tgt is not None: + self.tgt.prefetch(indices) + if self.align_dataset is not None: + self.align_dataset.prefetch(indices) + + def filter_indices_by_size(self, indices, max_sizes): + """Filter a list of sample indices. Remove those that are longer + than specified in max_sizes. + + Args: + indices (np.array): original array of sample indices + max_sizes (int or list[int] or tuple[int]): max sample size, + can be defined separately for src and tgt (then list or tuple) + + Returns: + np.array: filtered sample array + list: list of removed indices + """ + return data_utils.filter_paired_dataset_indices_by_size( + self.src_sizes, + self.tgt_sizes, + indices, + max_sizes, + ) diff --git a/SpeechT5/SpeechLM/speechlm/data/load_langpair_dataset.py b/SpeechT5/SpeechLM/speechlm/data/load_langpair_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..1622a79db99515836e7eb135e0ebd51e6a2d2dc0 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data/load_langpair_dataset.py @@ -0,0 +1,174 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/272c4c5197250997148fb12c0db6306035f166a4/fairseq/tasks/translation.py + 1. Add custom lang_format in function load_langpair_dataset + 2. If truncate_source (default no), use RandomCropDataset instead of TruncateDataset +""" + +import itertools +import logging +import os + +from fairseq.data import ( + AppendTokenDataset, + LanguagePairDataset, + PrependTokenDataset, + StripTokenDataset, + TruncateDataset, + RandomCropDataset, + data_utils, + indexed_dataset, +) + +from speechlm.data.concat_dataset import ConcatDataset + + +EVAL_BLEU_ORDER = 4 + + +logger = logging.getLogger(__name__) + + +def load_langpair_dataset( + data_path, + split, + src, + src_dict, + tgt, + tgt_dict, + combine, + dataset_impl, + upsample_primary, + left_pad_source, + left_pad_target, + max_source_positions, + max_target_positions, + prepend_bos=False, + load_alignments=False, + truncate_source=False, + append_source_id=False, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + prepend_bos_src=None, + lang_format="[{}]", + input_feeding=True, +): + def split_exists(split, src, tgt, lang, data_path): + filename = os.path.join(data_path, "{}.{}-{}.{}".format(split, src, tgt, lang)) + return indexed_dataset.dataset_exists(filename, impl=dataset_impl) + + src_datasets = [] + tgt_datasets = [] + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + + # infer langcode + if split_exists(split_k, src, tgt, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, src, tgt)) + elif split_exists(split_k, tgt, src, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, tgt, src)) + else: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + src_dataset = data_utils.load_indexed_dataset( + prefix + src, src_dict, dataset_impl + ) + if truncate_source: + src_dataset = AppendTokenDataset( + RandomCropDataset( + StripTokenDataset(src_dataset, src_dict.eos()), + max_source_positions - 1, + ), + src_dict.eos(), + ) + src_datasets.append(src_dataset) + + tgt_dataset = data_utils.load_indexed_dataset( + prefix + tgt, tgt_dict, dataset_impl + ) + if tgt_dataset is not None: + tgt_datasets.append(tgt_dataset) + + logger.info( + "{} {} {}-{} {} examples".format( + data_path, split_k, src, tgt, len(src_datasets[-1]) + ) + ) + + if not combine: + break + + assert len(src_datasets) == len(tgt_datasets) or len(tgt_datasets) == 0 + + if len(src_datasets) == 1: + src_dataset = src_datasets[0] + tgt_dataset = tgt_datasets[0] if len(tgt_datasets) > 0 else None + else: + sample_ratios = [1] * len(src_datasets) + sample_ratios[0] = upsample_primary + src_dataset = ConcatDataset(src_datasets, sample_ratios) + if len(tgt_datasets) > 0: + tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios) + else: + tgt_dataset = None + + if prepend_bos: + assert hasattr(src_dict, "bos_index") and hasattr(tgt_dict, "bos_index") + src_dataset = PrependTokenDataset(src_dataset, src_dict.bos()) + if tgt_dataset is not None: + tgt_dataset = PrependTokenDataset(tgt_dataset, tgt_dict.bos()) + elif prepend_bos_src is not None: + logger.info(f"prepending src bos: {prepend_bos_src}") + src_dataset = PrependTokenDataset(src_dataset, prepend_bos_src) + + eos = None + if append_source_id: + src_dataset = AppendTokenDataset( + src_dataset, src_dict.index(lang_format.format(src)) + ) + if tgt_dataset is not None: + tgt_dataset = AppendTokenDataset( + tgt_dataset, tgt_dict.index(lang_format.format(tgt)) + ) + eos = tgt_dict.index(lang_format.format(tgt)) + + align_dataset = None + if load_alignments: + align_path = os.path.join(data_path, "{}.align.{}-{}".format(split, src, tgt)) + if indexed_dataset.dataset_exists(align_path, impl=dataset_impl): + align_dataset = data_utils.load_indexed_dataset( + align_path, None, dataset_impl + ) + + tgt_dataset_sizes = tgt_dataset.sizes if tgt_dataset is not None else None + return LanguagePairDataset( + src_dataset, + src_dataset.sizes, + src_dict, + tgt_dataset, + tgt_dataset_sizes, + tgt_dict, + left_pad_source=left_pad_source, + left_pad_target=left_pad_target, + align_dataset=align_dataset, + eos=eos, + num_buckets=num_buckets, + shuffle=shuffle, + pad_to_multiple=pad_to_multiple, + input_feeding=input_feeding, + ) diff --git a/SpeechT5/SpeechLM/speechlm/data/multimodal_corpus_dataset.py b/SpeechT5/SpeechLM/speechlm/data/multimodal_corpus_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..3e954a332d869535cbc335d58447a9b5a854735b --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data/multimodal_corpus_dataset.py @@ -0,0 +1,370 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +from os import replace +import time +from collections import OrderedDict +from typing import Any, Dict, List, Optional + +import numpy as np +from fairseq.data import data_utils + +from fairseq.data import FairseqDataset + +logger = logging.getLogger(__name__) + + +class MultiCorpusDataset(FairseqDataset): + """ + see fairseq/fairseq/data/multi_corpus_dataset.__doc__ + + Args: + datasets: a OrderedDict of FairseqDataset instances. + distribution: a List containing the probability of getting an utterance from + corresponding dataset + seed: random seed for sampling the datsets + sort_indices: if true, will sort the ordered indices by size + batch_sample: if true, will ensure each batch is from a single dataset + """ + + def __init__( + self, + datasets: Dict[str, FairseqDataset], + max_positions: Dict, + distribution: List[float], + max_tokens_ratio: List[float], + seed: int = 1234, + sort_indices: bool = False, + check_length: bool = False, + ): + super().__init__() + assert isinstance(datasets, OrderedDict) + assert len(datasets) == len(distribution) + # assert sum(distribution) == 1 + self.datasets = datasets + self.distribution = distribution + self.max_tokens_ratio = max_tokens_ratio + self.seed = seed + self.sort_indices = sort_indices + self.max_positions = max_positions + self.check_length = check_length + + # Avoid repeated conversions to list later + self.dataset_list = list(datasets.values()) + self.total_num_instances = 0 + + # first_dataset = self.dataset_list[0] + + self.num_instances_per_dataset = [] + self.dataset_offsets = [] + for i, dataset in enumerate(self.dataset_list): + assert isinstance(dataset, FairseqDataset) + # assert type(dataset) is type(first_dataset) + self.num_instances_per_dataset.append( + 0 if self.distribution[i] == 0 else len(dataset) + ) + self.dataset_offsets.append(self.total_num_instances) + self.total_num_instances += self.num_instances_per_dataset[i] + + def ordered_indices(self): + start = time.time() + with data_utils.numpy_seed(self.seed, self.epoch): + logger.info(f"sampling new dataset with seed {self.seed} epoch {self.epoch}") + sampled_indices = {} + + # For each dataset i, sample self.distribution[i] * self.total_num_instances + for i, key in enumerate(self.datasets): + tp = time.time() + if self.distribution[i] == 0: + # skip dataset if sampling probability is 0 + continue + + if i < len(self.datasets) - 1: + num_instances = int(self.distribution[i] * self.total_num_instances) + high = self.dataset_offsets[i + 1] + else: + num_instances = int(self.distribution[i] * self.total_num_instances) + high = self.total_num_instances + + logger.info(f"sampling {num_instances} from {key} dataset") + + # First, add k copies of the dataset where k = num_instances // len(dataset). + # This ensures an equal distribution of the data points as much as possible. + # For the remaining entries randomly sample them + dataset_size = len(self.datasets[key]) + num_copies = num_instances // dataset_size + dataset_indices = np.random.permutation(high - self.dataset_offsets[i])[: num_instances - num_copies * dataset_size] + if num_copies > 0: + dataset_indices = np.concatenate( + ( + np.repeat( + np.arange(high - self.dataset_offsets[i]), num_copies + ), + dataset_indices, + ) + ) + # filter by size, we should ignore it by setting check_length=False + # , as it is very time-consuming on large dadaset + if self.max_positions[key] is not None and self.check_length: + dataset_indices, ignored = self.datasets[key].filter_indices_by_size( + dataset_indices, + self.max_positions[key], + ) + if len(ignored) > 0: + logger.warning( + ( + "{:,} samples have invalid sizes and will be skipped, " + "max_positions={}, first few sample ids={}" + ).format(len(ignored), self.max_positions[key], ignored[:10]) + ) + + if self.sort_indices: + logger.info(" - sampled indices took {}s".format(time.time() - tp)) + tp = time.time() + dataset_indices = np.sort(dataset_indices) + ordered_indices = self.datasets[key].ordered_indices() + if isinstance(ordered_indices[0], np.ndarray): # chunked audio data + dataset_indices = [order_idx + self.dataset_offsets[i] for order_idx in ordered_indices] + assert self.dataset_offsets[i] == 0 + # TODO for chunked audio data, now assume len(dataset_indices) == len(dataset). Don't filter any data. + else: + dataset_indices = ordered_indices[dataset_indices] + self.dataset_offsets[i] + logger.info(" - ordered_indices took {}s".format(time.time() - tp)) + else: + np.random.shuffle(dataset_indices) + + sampled_indices[key] = dataset_indices + + logger.info( + "multi_corpus_dataset ordered_indices took {}s".format( + time.time() - start + ) + ) + return sampled_indices + + def _map_index(self, index: int): + """ + If dataset A has length N and dataset B has length M + then index 1 maps to index 1 of dataset A, and index N + 1 + maps to index 1 of B. + """ + counter = 0 + for num_instances, key in zip(self.num_instances_per_dataset, self.datasets): + if index < counter + num_instances: + return index - counter, key + counter += num_instances + raise ValueError( + "Invalid index: {}, max: {}".format(index, self.total_num_instances) + ) + + def __len__(self): + """ + Length of this dataset is the sum of individual datasets + """ + return self.total_num_instances + + def __getitem__(self, index): + new_index, key = self._map_index(index) + try: + item = self.datasets[key][new_index] + item["full_id"] = index + return item + except Exception as e: + e.args = (f"Error from {key} dataset", *e.args) + raise + + def collater(self, samples): + """ + If we are doing batch sampling, then pick the right collater to use. + + Otherwise we assume all collaters are the same. + """ + if len(samples) == 0: + return None + + samples_dict = {key: [] for key in self.datasets} + for s in samples: + _, key = self._map_index(s["full_id"]) + samples_dict[key].append(s) + + batch = {} + for key in samples_dict: + if len(samples_dict[key]) == 0: + continue + batch[key] = self.datasets[key].collater(samples_dict[key]) + + return batch + + + def num_tokens(self, index: int): + index, key = self._map_index(index) + return self.datasets[key].num_tokens(index) + + def size(self, index: int): + index, key = self._map_index(index) + return self.datasets[key].size(index) + + @property + def can_reuse_epoch_itr_across_epochs(self): + return False + + def set_epoch(self, epoch, **unused): + super().set_epoch(epoch) + logger.info(f"setting epoch of multi_corpus_dataset to {epoch}") + for ds in self.dataset_list: + if hasattr(ds, "set_epoch"): + ds.set_epoch(epoch) + self.epoch = epoch + + @property + def supports_prefetch(self): + return False + + @property + def supports_fetch_outside_dataloader(self): + return all( + self.datasets[key].supports_fetch_outside_dataloader + for key in self.datasets + ) + + + def batch_by_size( + self, + indices, + max_tokens=None, + max_sentences=None, + required_batch_size_multiple=1, + ): + dataset_indices = indices + batches_dict = {} + for n, key in enumerate(dataset_indices): + max_tokens_ratio = self.max_tokens_ratio[n] + if isinstance(dataset_indices[key][0], np.ndarray): # chunked audio data + cur_batches = self.datasets[key].batch_by_size( + dataset_indices[key], + round(max_tokens * max_tokens_ratio), + max_sentences, + required_batch_size_multiple, + ) + logger.info(f"Created {sum([len(b) for b in cur_batches])} [{len(cur_batches)}] batches for dataset {key}") + else: + cur_batches = super().batch_by_size( + np.array(dataset_indices[key], dtype=np.int64), + round(max_tokens * max_tokens_ratio), + max_sentences, + required_batch_size_multiple, + ) + logger.info(f"Created {len(cur_batches)} batches for dataset {key}") + batches_dict[key] = cur_batches + + return batches_dict + + + def get_batch_sampler( + self, + indices, + num_shards, + seed, + max_tokens=None, + max_sentences=None, + required_batch_size_multiple=1, + split_modality_batch=False, + ): + + def batch_sampler(dataset, epoch): + start = time.time() + batches_dict = dataset.batch_by_size( + indices, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + ) + logger.info(f"multi_corpus_dataset, batch_by_size took {time.time() - start}s") + start = time.time() + new_batches = [] + + ### shuffle inner group size, split into speech/text batches + shuffled_batches_list = [] + speech_batches = [] + ### we should specify the speech_batches because: we need concatenate different speech datasets + # (e.g. ltr or km) instead of loading them parellelly. + for name, batches in batches_dict.items(): + if name.startswith("speech"): + if isinstance(batches[0], list): # chunked audio data + batches = self.datasets[name].shuffle_batches(list(batches), seed + epoch) + shuffled_batches_list.append(batches) + else: + batches = inner_bucket_shuffle(batches, seed+epoch, num_shards*10) + batches = batches[: (len(batches) // num_shards) * num_shards] + if len(batches) == 0: + logger.warning(f"Sample 0 batch for {name}, you should ensure that no {name} data provided.") + else: + speech_batches += batches + else: + batches = inner_bucket_shuffle(batches, seed+epoch, num_shards*10) + batches = batches[: (len(batches) // num_shards) * num_shards] + if len(batches) == 0: + logger.warning(f"Sample 0 batch for {name}, you should ensure that no {name} data provided.") + else: + batches = shuffle_buckets(batches, seed=seed+epoch, inner_shuf=False) + shuffled_batches_list.append(batches) + if len(speech_batches) > 0: + speech_batches = shuffle_buckets(speech_batches, seed=seed+epoch, inner_shuf=False) + shuffled_batches_list.append(speech_batches) + + ### create the final new_batches + num_batch = min(len(batches) for batches in shuffled_batches_list) + if split_modality_batch: + for i in range(0, num_batch, num_shards): + for batches in shuffled_batches_list: + new_batches += batches[i: i + num_shards] + else: + for i in range(num_batch): + new_batches.append(np.concatenate([batches[i] for batches in shuffled_batches_list])) + + logger.info(f"multi_corpus_dataset sample {len(new_batches)} batches, took {time.time() - start}s") + return new_batches + + def inner_bucket_shuffle(batches, seed, bucket_size=10, thr=0): + """we assert batches is sorted form long to short. + shuffle samples in a buctet(e.g. 10 batches). + batches: a list of numpy array""" + num_batch = len(batches) + new_batches = [] + num_buckets = len(batches) // bucket_size + i = 0 + while i < num_batch: + if (i < bucket_size * thr or + i >= bucket_size * (num_buckets - thr) + ): + new_batches.append(batches[i]) + i += 1 + else: + group = np.concatenate(batches[i: i+bucket_size]) + with data_utils.numpy_seed(seed): + np.random.shuffle(group) + new_batches += np.array_split(group, bucket_size) + i += bucket_size + assert all([len(batch) > 0 for batch in new_batches]) + return new_batches + + def shuffle_buckets(batches, seed, inner_shuf=True): + if inner_shuf: + batches = inner_bucket_shuffle(batches, seed, num_shards*10) + batches = [batches[i: i + num_shards] for i in range(0, len(batches)-num_shards+1, num_shards)] + assert len(batches[-1]) == num_shards + new_batches = [] + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + for group in batches: + new_batches += group + return new_batches + + return batch_sampler diff --git a/SpeechT5/SpeechLM/speechlm/data/text_to_unit_dataset.py b/SpeechT5/SpeechLM/speechlm/data/text_to_unit_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..f7671d0fe48e695dc008742842f1550f48d741bc --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data/text_to_unit_dataset.py @@ -0,0 +1,293 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +from pathlib import Path +from typing import List, Dict, Optional, Any +from dataclasses import dataclass + +import numpy as np +import torch + +from fairseq.data.audio.speech_to_text_dataset import ( + SpeechToTextDataset, + SpeechToTextDatasetCreator, + S2TDataConfig, + _collate_frames, + get_features_or_waveform, +) +from fairseq.data import Dictionary, data_utils as fairseq_data_utils + + +@dataclass +class TextToUnitDatasetItem(object): + index: int + source: torch.Tensor + target: Optional[torch.Tensor] = None + speaker_id: Optional[int] = None + speaker_emb: Optional[torch.Tensor] = None + duration: Optional[torch.Tensor] = None + pitch: Optional[torch.Tensor] = None + energy: Optional[torch.Tensor] = None + + +class Text2UnitDataset(SpeechToTextDataset): + def __init__( + self, + split: str, + is_train_split: bool, + cfg: S2TDataConfig, + unit_labels: List[str], + n_frames: List[int], + src_texts: Optional[List[str]] = None, + tgt_texts: Optional[List[str]] = None, + speakers: Optional[List[str]] = None, + src_langs: Optional[List[str]] = None, + tgt_langs: Optional[List[str]] = None, + ids: Optional[List[str]] = None, + tgt_dict: Optional[Dictionary] = None, + pre_tokenizer=None, + bpe_tokenizer=None, + n_frames_per_step=1, + speaker_to_id=None, + durations: Optional[List[List[int]]] = None, + pitches: Optional[List[str]] = None, + energies: Optional[List[str]] = None, + ): + super(Text2UnitDataset, self).__init__( + split, + is_train_split, + cfg, + unit_labels, + n_frames, + src_texts=src_texts, + tgt_texts=tgt_texts, + speakers=speakers, + src_langs=src_langs, + tgt_langs=tgt_langs, + ids=ids, + tgt_dict=tgt_dict, + pre_tokenizer=pre_tokenizer, + bpe_tokenizer=bpe_tokenizer, + n_frames_per_step=n_frames_per_step, + speaker_to_id=speaker_to_id, + ) + self.durations = durations + self.pitches = pitches + self.energies = energies + self.unit_labels = unit_labels + self.feature_root = Path(cfg.audio_root) + self.spk_emb_type = cfg.config.get("speaker_embedding_type", None) + self.random_spk = cfg.config.get("random_speaker", False) + if self.spk_emb_type is not None: + self.spk_emb_choices = [i for i in (self.feature_root / self.spk_emb_type).glob("*.npy")] + self.spk_emb_num = len(self.spk_emb_choices) + + def __getitem__(self, index: int) -> TextToUnitDatasetItem: + # s2t_item = super().__getitem__(index) + source = torch.LongTensor(self.unit_labels[index]) + target = None + if self.tgt_texts is not None: + tokenized = self.get_tokenized_tgt_text(index) + target = self.tgt_dict.encode_line( + tokenized, add_if_not_exist=False, append_eos=self.append_eos + ).long() + if self.cfg.prepend_tgt_lang_tag: + lang_tag_idx = self.get_lang_tag_idx( + self.tgt_langs[index], self.tgt_dict + ) + target = torch.cat((torch.LongTensor([lang_tag_idx]), target), 0) + + speaker_id = None + if self.speaker_to_id is not None: + speaker_id = self.speaker_to_id[self.speakers[index]] + + speaker_emb = None + if self.spk_emb_type is not None: + if self.random_spk: + spk_emb_path = self.spk_emb_choices[np.random.choice(self.spk_emb_num)] + else: + spk_emb_path = self.feature_root / self.spk_emb_type / f"{self.ids[index]}.npy" + speaker_emb = get_features_or_waveform(spk_emb_path) + speaker_emb = torch.from_numpy(speaker_emb).float() + + duration, pitch, energy = None, None, None + if self.durations is not None: + duration = torch.tensor( + self.durations[index] + [0], dtype=torch.long # pad 0 for EOS + ) + if self.pitches is not None: + pitch = get_features_or_waveform(self.pitches[index]) + pitch = torch.from_numpy( + np.concatenate((pitch, [0])) # pad 0 for EOS + ).float() + if self.energies is not None: + energy = get_features_or_waveform(self.energies[index]) + energy = torch.from_numpy( + np.concatenate((energy, [0])) # pad 0 for EOS + ).float() + return TextToUnitDatasetItem( + index=index, + source=source, + target=target, + speaker_id=speaker_id, + speaker_emb=speaker_emb, + duration=duration, + pitch=pitch, + energy=energy, + ) + + def collater(self, samples: List[TextToUnitDatasetItem]) -> Dict[str, Any]: + if len(samples) == 0: + return {} + + src_lengths, order = torch.tensor( + [s.target.shape[0] for s in samples], dtype=torch.long + ).sort(descending=True) + id_ = torch.tensor([s.index for s in samples], dtype=torch.long).index_select( + 0, order + ) + traget = fairseq_data_utils.collate_tokens( + [s.source for s in samples], + self.tgt_dict.pad(), + ).index_select(0, order) + + target_lengths = torch.tensor( + [s.source.shape[0] for s in samples], dtype=torch.long + ).index_select(0, order) + + src_tokens = fairseq_data_utils.collate_tokens( + [s.target for s in samples], + self.tgt_dict.pad(), + self.tgt_dict.eos(), + left_pad=False, + move_eos_to_beginning=False, + ).index_select(0, order) + + speaker = None + if self.speaker_to_id is not None: + speaker = ( + torch.tensor([s.speaker_id for s in samples], dtype=torch.long) + .index_select(0, order) + .view(-1, 1) + ) + if self.spk_emb_type is not None: + speaker = torch.stack([s.speaker_emb for s in samples], dim=0).index_select(0, order) + + bsz, _ = traget.size() + prev_output_tokens = torch.cat( + (traget.new_zeros((bsz, self.tgt_dict.bos())), traget[:, :-1]), dim=1 + ) + + durations, pitches, energies = None, None, None + if self.durations is not None: + durations = fairseq_data_utils.collate_tokens( + [s.duration for s in samples], 0 + ).index_select(0, order) + assert src_tokens.shape[1] == durations.shape[1] + if self.pitches is not None: + pitches = _collate_frames([s.pitch for s in samples], True) + pitches = pitches.index_select(0, order) + assert src_tokens.shape[1] == pitches.shape[1] + if self.energies is not None: + energies = _collate_frames([s.energy for s in samples], True) + energies = energies.index_select(0, order) + assert src_tokens.shape[1] == energies.shape[1] + src_texts = [self.tgt_dict.string(samples[i].target) for i in order] + + return { + "id": id_, + "net_input": { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + "prev_output_tokens": prev_output_tokens, + }, + "speaker": speaker, + "target": traget, + "durations": durations, + "pitches": pitches, + "energies": energies, + "target_lengths": target_lengths, + "ntokens": sum(target_lengths).item(), + "nsentences": len(samples), + "src_texts": src_texts, + } + + +class Text2UnitDatasetCreator(SpeechToTextDatasetCreator): + KEY_DURATION = "duration" + KEY_PITCH = "pitch" + KEY_ENERGY = "energy" + KEY_UNIT = "unit" + + @classmethod + def _from_list( + cls, + split_name: str, + is_train_split, + samples: List[Dict], + cfg: S2TDataConfig, + tgt_dict, + pre_tokenizer, + bpe_tokenizer, + n_frames_per_step, + speaker_to_id, + ) -> Text2UnitDataset: + audio_root = Path(cfg.audio_root) + ids = [s[cls.KEY_ID] for s in samples] + # audio_paths = [(audio_root / s[cls.KEY_AUDIO]).as_posix() for s in samples] + unit_labels = [s[cls.KEY_UNIT] for s in samples] + unit_labels = [ + None if dd is None else [int(d) for d in dd.split(" ")] for dd in unit_labels + ] + n_frames = [int(s[cls.KEY_N_FRAMES]) for s in samples] + tgt_texts = [s[cls.KEY_TGT_TEXT] for s in samples] + src_texts = [s.get(cls.KEY_SRC_TEXT, cls.DEFAULT_SRC_TEXT) for s in samples] + speakers = [s.get(cls.KEY_SPEAKER, cls.DEFAULT_SPEAKER) for s in samples] + src_langs = [s.get(cls.KEY_SRC_LANG, cls.DEFAULT_LANG) for s in samples] + tgt_langs = [s.get(cls.KEY_TGT_LANG, cls.DEFAULT_LANG) for s in samples] + + durations = [s.get(cls.KEY_DURATION, None) for s in samples] + durations = [ + None if dd is None else [int(d) for d in dd.split(" ")] for dd in durations + ] + durations = None if any(dd is None for dd in durations) else durations + + pitches = [s.get(cls.KEY_PITCH, None) for s in samples] + pitches = [ + None if pp is None else (audio_root / pp).as_posix() for pp in pitches + ] + pitches = None if any(pp is None for pp in pitches) else pitches + + energies = [s.get(cls.KEY_ENERGY, None) for s in samples] + energies = [ + None if ee is None else (audio_root / ee).as_posix() for ee in energies + ] + energies = None if any(ee is None for ee in energies) else energies + + return Text2UnitDataset( + split_name, + is_train_split, + cfg, + unit_labels, + n_frames, + src_texts, + tgt_texts, + speakers, + src_langs, + tgt_langs, + ids, + tgt_dict, + pre_tokenizer, + bpe_tokenizer, + n_frames_per_step, + speaker_to_id, + durations, + pitches, + energies, + ) diff --git a/SpeechT5/SpeechLM/speechlm/data_process/covost2/mp3_to_wav.py b/SpeechT5/SpeechLM/speechlm/data_process/covost2/mp3_to_wav.py new file mode 100644 index 0000000000000000000000000000000000000000..7d8056879637947467aa5e8a3c466129c590eecf --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/covost2/mp3_to_wav.py @@ -0,0 +1,42 @@ +import argparse +from tqdm import tqdm +from pydub import AudioSegment +import torchaudio +import os + +def mp3_convert_wav(mp3_file, wav_file): + try: + sound = AudioSegment.from_mp3(mp3_file) + sound=sound.set_frame_rate(16000) + sound=sound.set_channels(1) + sound=sound.set_sample_width(2) + sound.export(wav_file, format="wav") + except Exception as e: + print(e) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--input", "-i", required=True, type=str) + parser.add_argument("--shard", "-n", required=True, type=int) + parser.add_argument("--rank", "-r", required=True, type=int) + args = parser.parse_args() + + assert args.rank < args.shard, f"rank: {args.rank} >= shard: {args.shard}" + + with open(args.input, 'r') as f: + files = [line.strip() for line in f ] + + mp3_files = files[args.rank::args.shard] + for mp3_file in tqdm(mp3_files): + wav_file = mp3_file.replace("/clips/", "/wav/").replace(".mp3", ".wav") + if os.path.exists(wav_file): + try: + torchaudio.info(wav_file) + except Exception as e: + print(e) + mp3_convert_wav(mp3_file, wav_file) + else: + mp3_convert_wav(mp3_file, wav_file) + +if __name__ == "__main__": + main() diff --git a/SpeechT5/SpeechLM/speechlm/data_process/covost2/prepare_covost_data.py b/SpeechT5/SpeechLM/speechlm/data_process/covost2/prepare_covost_data.py new file mode 100644 index 0000000000000000000000000000000000000000..687bc9f81922745e8872d3a998a04bc8d9c589ca --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/covost2/prepare_covost_data.py @@ -0,0 +1,295 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- +""" +Modified from: https://github.com/facebookresearch/fairseq/blob/272c4c5197250997148fb12c0db6306035f166a4/examples/speech_to_text/prep_covost_data.py +1. normalize the punctuation +2. instead of extract fbank features, we direcly use 16k-Hz waveform +""" +import argparse +import logging +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import Optional, Tuple + +import pandas as pd +import torchaudio +from examples.speech_to_text.data_utils import ( + filter_manifest_df, + gen_config_yaml, + gen_vocab, + load_df_from_tsv, + save_df_to_tsv, +) +from torch import Tensor +from torch.utils.data import Dataset +from torchaudio.datasets.utils import download_url, extract_archive +from tqdm import tqdm +from pydub import AudioSegment +import soundfile as sf +import sacremoses + +log = logging.getLogger(__name__) + + +MANIFEST_COLUMNS = ["id", "audio", "n_frames", "tgt_text"] + + +def mp3_convert_wav(mp3_file, wav_file): + sound = AudioSegment.from_mp3(mp3_file) + sound=sound.set_frame_rate(16000) + sound=sound.set_channels(1) + sound=sound.set_sample_width(2) + sound.export(wav_file, format="wav") + +class CoVoST(Dataset): + """Create a Dataset for CoVoST (https://github.com/facebookresearch/covost). + + Args: + root (str): root path to the dataset and generated manifests/features + source_language (str): source (audio) language + target_language (str, optional): target (text) language, + None for no translation (default: None) + version (int, optional): CoVoST version. (default: 2) + download (bool, optional): Whether to download the dataset if it is not + found at root path. (default: ``False``). + """ + + COVOST_URL_TEMPLATE = ( + "https://dl.fbaipublicfiles.com/covost/" + "covost_v2.{src_lang}_{tgt_lang}.tsv.tar.gz" + ) + + VERSIONS = {2} + SPLITS = ["train", "dev", "test"] + + XX_EN_LANGUAGES = { + 1: ["fr", "de", "nl", "ru", "es", "it", "tr", "fa", "sv-SE", "mn", "zh-CN"], + 2: [ + "fr", + "de", + "es", + "ca", + "it", + "ru", + "zh-CN", + "pt", + "fa", + "et", + "mn", + "nl", + "tr", + "ar", + "sv-SE", + "lv", + "sl", + "ta", + "ja", + "id", + "cy", + ], + } + EN_XX_LANGUAGES = { + 1: [], + 2: [ + "de", + "tr", + "fa", + "sv-SE", + "mn", + "zh-CN", + "cy", + "ca", + "sl", + "et", + "id", + "ar", + "ta", + "lv", + "ja", + ], + } + + def __init__( + self, + root: str, + split: str, + source_language: str, + target_language: Optional[str] = None, + version: int = 2, + ) -> None: + assert version in self.VERSIONS and split in self.SPLITS + assert source_language is not None + self.no_translation = target_language is None + if not self.no_translation: + assert "en" in {source_language, target_language} + if source_language == "en": + assert target_language in self.EN_XX_LANGUAGES[version] + else: + assert source_language in self.XX_EN_LANGUAGES[version] + else: + # Hack here so that we can get "split" column from CoVoST TSV. + # Note that we use CoVoST train split for ASR which is an extension + # to Common Voice train split. + target_language = "de" if source_language == "en" else "en" + + self.root: Path = Path(root) + + cv_tsv_path = self.root / "validated.tsv" + assert cv_tsv_path.is_file() + + covost_url = self.COVOST_URL_TEMPLATE.format( + src_lang=source_language, tgt_lang=target_language + ) + covost_archive = self.root / Path(covost_url).name + if not covost_archive.is_file(): + download_url(covost_url, self.root.as_posix(), hash_value=None) + extract_archive(covost_archive.as_posix()) + + cv_tsv = load_df_from_tsv(cv_tsv_path) + covost_tsv = load_df_from_tsv( + self.root / Path(covost_url).name.replace(".tar.gz", "") + ) + df = pd.merge( + left=cv_tsv[["path", "sentence", "client_id"]], + right=covost_tsv[["path", "translation", "split"]], + how="inner", + on="path", + ) + if split == "train": + df = df[(df["split"] == split) | (df["split"] == f"{split}_covost")] + else: + df = df[df["split"] == split] + data = df.to_dict(orient="index").items() + data = [v for k, v in sorted(data, key=lambda x: x[0])] + self.data = [] + for e in data: + try: + path = self.root / "clips" / e["path"] + _ = torchaudio.info(path.as_posix()) + self.data.append(e) + except RuntimeError: + pass + + self.normalizer = sacremoses.MosesPunctNormalizer( + lang=target_language, + pre_replace_unicode_punct=True, + post_remove_control_chars=True, + ) + + def __getitem__( + self, n: int + ) -> Tuple[Tensor, int, str, str, Optional[str], str, str]: + """Load the n-th sample from the dataset. + + Args: + n (int): The index of the sample to be loaded + + Returns: + tuple: ``(waveform, sample_rate, sentence, translation, speaker_id, + sample_id)`` + """ + data = self.data[n] + path = self.root / "clips" / data["path"] + # waveform, sample_rate = torchaudio.load(path) + sentence = data["sentence"] + translation = None if self.no_translation else data["translation"] + translation = self.normalizer.normalize(translation) + speaker_id = data["client_id"] + _id = data["path"].replace(".mp3", "") + return path, -1, sentence, translation, speaker_id, _id + + def __len__(self) -> int: + return len(self.data) + + +def process(args): + root = Path(args.data_root).absolute() / args.src_lang + outroot = root / f"{args.src_lang}-{args.tgt_lang}" + if args.vocab_type != "char": + outroot = root / f"{args.src_lang}-{args.tgt_lang}-{args.vocab_type}" + if not root.is_dir(): + raise NotADirectoryError(f"{root} does not exist") + #1. Extract featuress + # mp3-to-wav can take long long time, better run it externally with multi threads. + feature_root = root / "wav" + # feature_root.mkdir(exist_ok=True) + # for split in CoVoST.SPLITS: + # print(f"Fetching split {split}...") + # dataset = CoVoST(root, split, args.src_lang, args.tgt_lang) + # print("Converting mp3 to wav...") + # handle = open(root / f"{split}.id", "w") + # for waveform, _, _, _, _, utt_id in tqdm(dataset): + # wav_file = feature_root / f"{utt_id}.wav" + # print(waveform, file=handle) + # mp3_convert_wav(waveform, wav_file) + + #2. Generate TSV manifest + print("Generating manifest...") + train_text = [] + task = f"asr_{args.src_lang}" + if args.tgt_lang is not None: + task = f"st_{args.src_lang}_{args.tgt_lang}" + for split in CoVoST.SPLITS: + manifest = {c: [] for c in MANIFEST_COLUMNS} + dataset = CoVoST(root, split, args.src_lang, args.tgt_lang) + for waveform, _, src_utt, tgt_utt, speaker_id, utt_id in tqdm(dataset): + wav_file = feature_root / f"{utt_id}.wav" + manifest["id"].append(utt_id) + manifest["audio"].append(wav_file.as_posix().replace("/data/", "/mnt/default/")) + manifest["n_frames"].append(sf.info(wav_file).frames) + manifest["tgt_text"].append(src_utt if args.tgt_lang is None else tgt_utt) + is_train_split = split.startswith("train") + if is_train_split: + train_text.extend(manifest["tgt_text"]) + df = pd.DataFrame.from_dict(manifest) + df = filter_manifest_df(df, is_train_split=is_train_split, min_n_frames=320, max_n_frames=480000) + save_df_to_tsv(df, outroot / f"{split}_{task}.tsv") + # Generate vocab + vocab_size_str = "" if args.vocab_type == "char" else str(args.vocab_size) + spm_filename_prefix = f"spm_{args.vocab_type}{vocab_size_str}_{task}" + with NamedTemporaryFile(mode="w") as f: + for t in train_text: + f.write(t + "\n") + gen_vocab( + Path(f.name), + outroot / spm_filename_prefix, + args.vocab_type, + args.vocab_size + ) + # Generate config YAML + # gen_config_yaml( + # outroot, + # spm_filename=spm_filename_prefix + ".model", + # yaml_filename=f"config_{task}.yaml", + # specaugment_policy="lb", + # ) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--data-root", "-d", required=True, type=str, + help="data root with sub-folders for each language /" + ) + parser.add_argument( + "--vocab-type", + default="unigram", + required=True, + type=str, + choices=["bpe", "unigram", "char"], + ), + parser.add_argument("--vocab-size", default=1000, type=int) + parser.add_argument("--src-lang", "-s", required=True, type=str) + parser.add_argument("--tgt-lang", "-t", type=str) + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/SpeechLM/speechlm/data_process/filter_paireddata_by_len.py b/SpeechT5/SpeechLM/speechlm/data_process/filter_paireddata_by_len.py new file mode 100644 index 0000000000000000000000000000000000000000..ce09af333ae076f3d8088dd47bd0b57c3d720b3a --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/filter_paireddata_by_len.py @@ -0,0 +1,48 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import os +import argparse +from tqdm import tqdm +import numpy as np + + +lg_label = "__label__{}" + +def writefile(filename, lines): + with open(filename, 'w', encoding='utf-8') as f: + f.writelines(lines) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--input", "-i", required=True, type=str) + parser.add_argument("--output", "-o", required=True, type=str) + parser.add_argument("--src", "-s", required=True, type=str) + parser.add_argument("--tgt", "-t", required=True, type=str) + parser.add_argument("--max-len", "-m", default=2998, type=int) + args = parser.parse_args() + + src_lines, tgt_lines = [], [] + with open(f"{args.input}.{args.src}", 'r') as f1, open(f"{args.input}.{args.tgt}", 'r') as f2: + for src_line, tgt_line in tqdm(zip(f1, f2)): + src_len = len(src_line.strip().split()) + tgt_len = len(tgt_line.strip().split()) + if src_len < args.max_len and src_len > 0 and tgt_len < args.max_len and tgt_len > 0: + src_lines.append(src_line) + tgt_lines.append(tgt_line) + + writefile(f"{args.output}.{args.src}", src_lines) + writefile(f"{args.output}.{args.tgt}", tgt_lines) + +if __name__ == "__main__": + main() + + + diff --git a/SpeechT5/SpeechLM/speechlm/data_process/get_t2u_manifest.py b/SpeechT5/SpeechLM/speechlm/data_process/get_t2u_manifest.py new file mode 100644 index 0000000000000000000000000000000000000000..f60b34a1587df779ed17311ee9d257933ae9453e --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/get_t2u_manifest.py @@ -0,0 +1,119 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import argparse +import logging +from pathlib import Path +from collections import defaultdict + +import pandas as pd +import torchaudio +from tqdm import tqdm +import numpy as np +import torch + +from fairseq.data.audio.audio_utils import convert_waveform +from examples.speech_to_text.data_utils import save_df_to_tsv +from examples.speech_synthesis.data_utils import extract_pitch + + +log = logging.getLogger(__name__) + +def get_duration(fa_phone): + """fa_phone: force-aligned phone, 1-D numpy""" + same = np.concatenate(([True], fa_phone[:-1] != fa_phone[1:], [True])) + index = np.where(same)[0] + count = np.diff(index) + return count + + + +def process(args): + # assert "train" in args.splits + out_root = Path(args.output_root).absolute() + out_root.mkdir(exist_ok=True) + + print("Fetching data...") + audio_manifest_root = Path(args.audio_manifest_root).absolute() + for s in args.splits: + if args.add_pitch: + pitch_root = out_root / "pitch" / s + pitch_root.mkdir(exist_ok=True) + manifest = defaultdict(list) + with open(audio_manifest_root / f"{s}.audio.tsv") as f1, \ + open(audio_manifest_root / f"{s}.phn") as f2, \ + open(audio_manifest_root / f"{s}.km") as f3: + audio_root = f1.readline().strip() + audio_root = Path(audio_root) + for audio_path, fa_phone, fa_unit in tqdm(zip(f1, f2, f3)): + record = True + audio_path, n_frames = audio_path.strip().split("\t") + fa_phone = fa_phone.strip().split() + fa_unit = fa_unit.strip() + uttid = audio_path.split("/")[-1].split(".")[0] + speaker = uttid.split("-")[0] + + if args.add_duration: + assert len(fa_phone) == len(fa_unit.split()) + fa_phone = np.array(list(map(int, fa_phone))) + duration = get_duration(fa_phone) + reduced_phone = torch.LongTensor(fa_phone).unique_consecutive().numpy() + if args.add_pitch: + pitch_path = pitch_root / f"{uttid}.npy" + if not pitch_path.is_file(): + waveform, sample_rate = torchaudio.load(audio_root / audio_path) + waveform, sample_rate = convert_waveform( + waveform, sample_rate, normalize_volume=args.normalize_volume, + ) + pitch = extract_pitch( + waveform, sample_rate, None, + hop_length=args.hop_length, log_scale=True, + phoneme_durations=duration + ) + if pitch is not None: + np.save(pitch_path.as_posix(), pitch) + else: + record = False + else: + reduced_phone = fa_phone + + if record: + manifest["id"].append(uttid) + manifest["speaker"].append(speaker) + manifest["n_frames"].append(len(fa_unit.split())) + manifest["tgt_text"].append(" ".join(map(str, reduced_phone))) + manifest["unit"].append(fa_unit) + if args.add_duration: + manifest["duration"].append(" ".join(map(str, duration))) + if args.add_pitch: + manifest["pitch"].append(f"pitch/{s}/{uttid}.npy") + save_df_to_tsv( + pd.DataFrame.from_dict(manifest), + out_root / f"{s}.tsv" + ) + + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--audio-manifest-root", "-m", type=str) + parser.add_argument("--output-root", "-o", required=True, type=str) + parser.add_argument("--splits", "-s", type=str, nargs="+", + default=["train", "dev", "test"]) + parser.add_argument("--normalize-volume", "-n", action="store_true") + parser.add_argument("--hop-length", type=int, default=256) + parser.add_argument("--add-duration", action="store_true") + parser.add_argument("--add-pitch", action="store_true") + args = parser.parse_args() + + process(args) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/SpeechLM/speechlm/data_process/get_t2u_manifest_textonly.py b/SpeechT5/SpeechLM/speechlm/data_process/get_t2u_manifest_textonly.py new file mode 100644 index 0000000000000000000000000000000000000000..b332ecc9a8ca8f5899a7f4d3627aeb2611a768c2 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/get_t2u_manifest_textonly.py @@ -0,0 +1,67 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import argparse +import logging +from pathlib import Path +from collections import defaultdict + +import pandas as pd +from tqdm import tqdm +import numpy as np +from examples.speech_to_text.data_utils import save_df_to_tsv + + +log = logging.getLogger(__name__) + +def get_duration(fa_phone): + """fa_phone: force-aligned phone, 1-D numpy""" + same = np.concatenate(([True], fa_phone[:-1] != fa_phone[1:], [True])) + index = np.where(same)[0] + count = np.diff(index) + return count + +def process(args): + # assert "train" in args.splits + out_root = Path(args.output_root).absolute() + out_root.mkdir(exist_ok=True) + + print("Fetching data...") + audio_manifest_root = Path(args.audio_manifest_root).absolute() + for s in args.splits: + manifest = defaultdict(list) + with open(audio_manifest_root / f"{s}.phn") as f1: + for i, reduced_phone in tqdm(enumerate(f1)): + reduced_phone = reduced_phone.strip() + uttid = f"librilm-{i}" + speaker = uttid.split("-")[0] + + manifest["id"].append(uttid) + manifest["speaker"].append(speaker) + manifest["n_frames"].append(len(reduced_phone)) + manifest["tgt_text"].append(reduced_phone) + manifest["unit"].append(0) + save_df_to_tsv( + pd.DataFrame.from_dict(manifest), + out_root / f"{s}.tsv" + ) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--audio-manifest-root", "-m", type=str) + parser.add_argument("--output-root", "-o", required=True, type=str) + parser.add_argument("--splits", "-s", type=str, nargs="+", + default=["train", "dev", "test"]) + parser.add_argument("--add-fastspeech-targets", action="store_true") + args = parser.parse_args() + + process(args) + +if __name__ == "__main__": + main() diff --git a/SpeechT5/SpeechLM/speechlm/data_process/phoneize_with_sil.py b/SpeechT5/SpeechLM/speechlm/data_process/phoneize_with_sil.py new file mode 100644 index 0000000000000000000000000000000000000000..6fcdd6c18c80b0171965b804e3b4bb9a7ead18e2 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/phoneize_with_sil.py @@ -0,0 +1,132 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +""" +Modified from https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4/examples/wav2vec/unsupervised/scripts/phonemize_with_sil.py +""" + +import argparse +import numpy as np +import sys +from g2p_en import G2p +from tqdm import tqdm +import logging +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO, +) +logger = logging.getLogger(__name__) + +def get_parser(): + parser = argparse.ArgumentParser( + description="converts words to phones adding optional silences around in between words" + ) + parser.add_argument( + "--sil-prob", + "-s", + type=float, + default=0, + help="probability of inserting silence between each word", + ) + parser.add_argument( + "--surround", + action="store_true", + help="if set, surrounds each example with silence", + ) + parser.add_argument( + "--lexicon", + help="lexicon to convert to phones", + required=True, + ) + parser.add_argument( + "--strict", + action="store_true", + help="if set, OOV words will raise a error (for train/valid set)", + ) + parser.add_argument( + "--input", + "-i", + help="input text file", + required=True, + ) + parser.add_argument( + "--output", + "-o", + help="input text file", + required=True, + ) + + + return parser + + +def normalize_phn(phons): + """ + convert g2p style phone to 39-phone set + """ + return [p.rstrip('0123456789') for p in phons] + + +def main(): + parser = get_parser() + args = parser.parse_args() + + sil_prob = args.sil_prob + surround = args.surround + sil = "" + + wrd_to_phn = {} + g2p = G2p() + + with open(args.lexicon, "r") as lf: + for line in lf: + items = line.rstrip().split() + assert len(items) > 1, line + assert items[0] not in wrd_to_phn, items + wrd_to_phn[items[0]] = items[1:] + + with open(args.input, "r") as fin, open(args.output, "w", encoding="utf-8") as fout: + for line in tqdm(fin): + words = line.strip().upper().split() + + if not all(w in wrd_to_phn for w in words): + if args.strict: + # logger.warning(f"| Warning: OOV words found: {line}") + pass + else: + continue + + phones = [] + if surround: + phones.append(sil) + + sample_sil_probs = None + if sil_prob > 0 and len(words) > 1: + sample_sil_probs = np.random.random(len(words) - 1) + + for i, w in enumerate(words): + if w in wrd_to_phn: + phones.extend(wrd_to_phn[w]) + else: + phones.extend(normalize_phn(g2p(w))) + if ( + sample_sil_probs is not None + and i < len(sample_sil_probs) + and sample_sil_probs[i] < sil_prob + ): + phones.append(sil) + + if surround: + phones.append(sil) + print(" ".join(phones), file=fout) + + +if __name__ == "__main__": + main() diff --git a/SpeechT5/SpeechLM/speechlm/data_process/phoneme_tokenizer/ltr2kaldi_phn_sil025.py b/SpeechT5/SpeechLM/speechlm/data_process/phoneme_tokenizer/ltr2kaldi_phn_sil025.py new file mode 100644 index 0000000000000000000000000000000000000000..014d0a29c1da416fead72a9235961143892d0326 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/phoneme_tokenizer/ltr2kaldi_phn_sil025.py @@ -0,0 +1,77 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import os +import tqdm +import argparse +import numpy as np + +parser = argparse.ArgumentParser() +parser.add_argument("--input", "-i", required=True, type=str) +parser.add_argument("--output", "-o", required=True, type=str) +parser.add_argument("--lexicon", default='align_lexicon.txt', type=str) +args = parser.parse_args() + +sil_prob = 0.25 + +if not os.path.exists(args.lexicon): + print(f"| Warning: lexicon {args.lexicon} not found, downloading ...") + try: + os.system(f"wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1QVeyCpLXLnujBUAickpo-jaSVY-vKLnT' -O {args.lexicon}") + except Exception as e: + print(e) + print(f"| Error downloading {args.lexicon}, please download it from https://drive.google.com/file/d/1QVeyCpLXLnujBUAickpo-jaSVY-vKLnT/view?usp=sharing") + exit(1) +dict = {} +f = open(args.lexicon) +for l in f: + dict[l.split()[0]] = l.split()[2:] + assert l.split()[0] == l.split()[1] + +f = open(args.input, 'r') +w_f = open(f'{args.output}.kaldi_phn_sil025', 'w') +w_oov = open(f'{args.output}.kaldi_phn_sil025.oov', 'w') + +oov_nums = 0 +total_nums = 0 +for l in tqdm.tqdm(f): + words = l.strip().replace(" ", "").split("|") + # words = l.strip().upper().split() + words = [w for w in words if w != ''] + + phones = [] + phones.extend(dict['!SIL']) + + sample_sil_probs = None + if sil_prob > 0 and len(words) > 1: + sample_sil_probs = np.random.random(len(words) - 1) + + for i, w in enumerate(words): + total_nums += 1 + if w not in dict: + w = '' + oov_nums += 1 + w_oov.write(w + '\n') + + phones.extend(dict[w]) + + if ( + sample_sil_probs is not None + and i < len(sample_sil_probs) + and sample_sil_probs[i] < sil_prob + ): + phones.extend(dict['!SIL']) + + phones.extend(dict['!SIL']) + w_f.write(' '.join(phones) + '\n') +w_oov.write(f'{oov_nums}\n') +print(f"OOV rate: {oov_nums}/{total_nums}") + +# !!! After processing, use this comand to adjust the SIL +### sed -i 's/SIL_S/SIL/g' your_file diff --git a/SpeechT5/SpeechLM/speechlm/data_process/phoneme_tokenizer/mean5_and_std25_sil14_spn32.dict b/SpeechT5/SpeechLM/speechlm/data_process/phoneme_tokenizer/mean5_and_std25_sil14_spn32.dict new file mode 100644 index 0000000000000000000000000000000000000000..b1957bee8f121f26709dd6d45a657698fd7c1f15 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/phoneme_tokenizer/mean5_and_std25_sil14_spn32.dict @@ -0,0 +1 @@ +{"SIL": [14, 7], "AE1_I": [5, 2.5], "P_I": [5, 2.5], "T_I": [5, 2.5], "ER0_E": [5, 2.5], "W_B": [5, 2.5], "AH1_I": [5, 2.5], "N_E": [5, 2.5], "M_B": [5, 2.5], "IH1_I": [5, 2.5], "S_I": [5, 2.5], "IH0_I": [5, 2.5], "Z_E": [5, 2.5], "R_B": [5, 2.5], "EY1_I": [5, 2.5], "CH_I": [5, 2.5], "AH0_I": [5, 2.5], "L_E": [5, 2.5], "L_B": [5, 2.5], "N_I": [5, 2.5], "D_E": [5, 2.5], "IH0_B": [5, 2.5], "S_B": [5, 2.5], "R_I": [5, 2.5], "AY1_I": [5, 2.5], "Z_I": [5, 2.5], "V_I": [5, 2.5], "JH_B": [5, 2.5], "T_E": [5, 2.5], "EH1_I": [5, 2.5], "R_E": [5, 2.5], "DH_B": [5, 2.5], "IY0_E": [5, 2.5], "AE1_B": [5, 2.5], "L_I": [5, 2.5], "IY2_E": [5, 2.5], "OW1_I": [5, 2.5], "D_B": [5, 2.5], "AW1_I": [5, 2.5], "UW1_E": [5, 2.5], "AH0_S": [5, 2.5], "HH_B": [5, 2.5], "AA1_I": [5, 2.5], "OW0_E": [5, 2.5], "F_B": [5, 2.5], "JH_I": [5, 2.5], "TH_E": [5, 2.5], "AO1_B": [5, 2.5], "D_I": [5, 2.5], "ER0_I": [5, 2.5], "AH0_B": [5, 2.5], "IY0_I": [5, 2.5], "IH1_B": [5, 2.5], "AA2_I": [5, 2.5], "S_E": [5, 2.5], "T_B": [5, 2.5], "ER1_I": [5, 2.5], "B_B": [5, 2.5], "AY1_E": [5, 2.5], "UH1_I": [5, 2.5], "K_E": [5, 2.5], "AO1_I": [5, 2.5], "W_I": [5, 2.5], "EY1_E": [5, 2.5], "AH1_E": [5, 2.5], "V_E": [5, 2.5], "OW1_B": [5, 2.5], "K_B": [5, 2.5], "TH_I": [5, 2.5], "B_I": [5, 2.5], "P_B": [5, 2.5], "Y_I": [5, 2.5], "UW1_I": [5, 2.5], "IH0_E": [5, 2.5], "IY1_E": [5, 2.5], "K_I": [5, 2.5], "AO2_I": [5, 2.5], "NG_E": [5, 2.5], "ER1_B": [5, 2.5], "TH_B": [5, 2.5], "IY1_I": [5, 2.5], "AE0_I": [5, 2.5], "AH0_E": [5, 2.5], "M_E": [5, 2.5], "N_B": [5, 2.5], "IY1_B": [5, 2.5], "DH_I": [5, 2.5], "G_I": [5, 2.5], "SH_I": [5, 2.5], "SH_B": [5, 2.5], "P_E": [5, 2.5], "AY1_S": [5, 2.5], "AA1_B": [5, 2.5], "EH1_B": [5, 2.5], "IH2_I": [5, 2.5], "AH1_B": [5, 2.5], "F_E": [5, 2.5], "AW1_B": [5, 2.5], "F_I": [5, 2.5], "EH2_I": [5, 2.5], "JH_E": [5, 2.5], "AY2_I": [5, 2.5], "EY2_E": [5, 2.5], "NG_I": [5, 2.5], "CH_E": [5, 2.5], "EY1_B": [5, 2.5], "AA0_B": [5, 2.5], "Y_B": [5, 2.5], "DH_E": [5, 2.5], "IY2_I": [5, 2.5], "V_B": [5, 2.5], "OY1_I": [5, 2.5], "UW0_E": [5, 2.5], "OW1_E": [5, 2.5], "G_B": [5, 2.5], "AE2_B": [5, 2.5], "M_I": [5, 2.5], "SH_E": [5, 2.5], "IH2_B": [5, 2.5], "AW1_E": [5, 2.5], "ZH_I": [5, 2.5], "ER0_S": [5, 2.5], "AY1_B": [5, 2.5], "AA0_I": [5, 2.5], "G_E": [5, 2.5], "EH0_B": [5, 2.5], "SPN_S": [32, 11], "UW2_I": [5, 2.5], "UW0_I": [5, 2.5], "EY2_I": [5, 2.5], "ER1_E": [5, 2.5], "OW2_I": [5, 2.5], "OW0_I": [5, 2.5], "HH_I": [5, 2.5], "B_E": [5, 2.5], "AO1_E": [5, 2.5], "AH2_B": [5, 2.5], "UH2_I": [5, 2.5], "OW1_S": [5, 2.5], "AO2_B": [5, 2.5], "OY1_E": [5, 2.5], "AE2_I": [5, 2.5], "AO0_B": [5, 2.5], "EH2_B": [5, 2.5], "EY1_S": [5, 2.5], "AE0_B": [5, 2.5], "ER0_B": [5, 2.5], "EH0_I": [5, 2.5], "EY0_I": [5, 2.5], "AW2_E": [5, 2.5], "AW2_I": [5, 2.5], "AY0_B": [5, 2.5], "AA2_B": [5, 2.5], "EY0_E": [5, 2.5], "AO0_I": [5, 2.5], "AY0_I": [5, 2.5], "AH2_I": [5, 2.5], "OW2_E": [5, 2.5], "ZH_E": [5, 2.5], "AY2_E": [5, 2.5], "ER2_I": [5, 2.5], "IY2_B": [5, 2.5], "AA1_S": [5, 2.5], "AA1_E": [5, 2.5], "OY0_I": [5, 2.5], "IY0_B": [5, 2.5], "OY2_E": [5, 2.5], "OW2_B": [5, 2.5], "AY0_E": [5, 2.5], "OY2_I": [5, 2.5], "UW1_B": [5, 2.5], "OY0_E": [5, 2.5], "UH0_I": [5, 2.5], "OY1_B": [5, 2.5], "AW0_B": [5, 2.5], "AO1_S": [5, 2.5], "OW0_B": [5, 2.5], "EH1_S": [5, 2.5], "AW0_I": [5, 2.5], "UW0_B": [5, 2.5], "AO2_E": [5, 2.5], "UW2_E": [5, 2.5], "L_S": [5, 2.5], "Z_B": [5, 2.5], "AA2_E": [5, 2.5], "EY0_B": [5, 2.5], "AY2_B": [5, 2.5], "AW0_E": [5, 2.5], "IY1_S": [5, 2.5], "EY2_B": [5, 2.5], "AH1_S": [5, 2.5], "IH2_E": [5, 2.5], "AW2_B": [5, 2.5], "AA0_E": [5, 2.5], "ER2_E": [5, 2.5], "ZH_B": [5, 2.5], "UH1_E": [5, 2.5], "EH1_E": [5, 2.5], "IH1_E": [5, 2.5], "ER1_S": [5, 2.5], "EH2_E": [5, 2.5], "AO0_E": [5, 2.5], "OY1_S": [5, 2.5], "AA_B": [5, 2.5], "AA_E": [5, 2.5], "AA_I": [5, 2.5], "AA_S": [5, 2.5], "AA0_S": [5, 2.5], "AA2_S": [5, 2.5], "AE_B": [5, 2.5], "AE_E": [5, 2.5], "AE_I": [5, 2.5], "AE_S": [5, 2.5], "AE0_E": [5, 2.5], "AE0_S": [5, 2.5], "AE1_E": [5, 2.5], "AE1_S": [5, 2.5], "AE2_E": [5, 2.5], "AE2_S": [5, 2.5], "AH_B": [5, 2.5], "AH_E": [5, 2.5], "AH_I": [5, 2.5], "AH_S": [5, 2.5], "AH2_E": [5, 2.5], "AH2_S": [5, 2.5], "AO_B": [5, 2.5], "AO_E": [5, 2.5], "AO_I": [5, 2.5], "AO_S": [5, 2.5], "AO0_S": [5, 2.5], "AO2_S": [5, 2.5], "AW_B": [5, 2.5], "AW_E": [5, 2.5], "AW_I": [5, 2.5], "AW_S": [5, 2.5], "AW0_S": [5, 2.5], "AW1_S": [5, 2.5], "AW2_S": [5, 2.5], "AY_B": [5, 2.5], "AY_E": [5, 2.5], "AY_I": [5, 2.5], "AY_S": [5, 2.5], "AY0_S": [5, 2.5], "AY2_S": [5, 2.5], "B_S": [5, 2.5], "CH_S": [5, 2.5], "D_S": [5, 2.5], "DH_S": [5, 2.5], "EH_B": [5, 2.5], "EH_E": [5, 2.5], "EH_I": [5, 2.5], "EH_S": [5, 2.5], "EH0_E": [5, 2.5], "EH0_S": [5, 2.5], "EH2_S": [5, 2.5], "ER_B": [5, 2.5], "ER_E": [5, 2.5], "ER_I": [5, 2.5], "ER_S": [5, 2.5], "ER2_B": [5, 2.5], "ER2_S": [5, 2.5], "EY_B": [5, 2.5], "EY_E": [5, 2.5], "EY_I": [5, 2.5], "EY_S": [5, 2.5], "EY0_S": [5, 2.5], "EY2_S": [5, 2.5], "F_S": [5, 2.5], "G_S": [5, 2.5], "HH_E": [5, 2.5], "HH_S": [5, 2.5], "IH_B": [5, 2.5], "IH_E": [5, 2.5], "IH_I": [5, 2.5], "IH_S": [5, 2.5], "IH0_S": [5, 2.5], "IH1_S": [5, 2.5], "IH2_S": [5, 2.5], "IY_B": [5, 2.5], "IY_E": [5, 2.5], "IY_I": [5, 2.5], "IY_S": [5, 2.5], "IY0_S": [5, 2.5], "IY2_S": [5, 2.5], "JH_S": [5, 2.5], "K_S": [5, 2.5], "M_S": [5, 2.5], "N_S": [5, 2.5], "NG_B": [5, 2.5], "NG_S": [5, 2.5], "OW_B": [5, 2.5], "OW_E": [5, 2.5], "OW_I": [5, 2.5], "OW_S": [5, 2.5], "OW0_S": [5, 2.5], "OW2_S": [5, 2.5], "OY_B": [5, 2.5], "OY_E": [5, 2.5], "OY_I": [5, 2.5], "OY_S": [5, 2.5], "OY0_B": [5, 2.5], "OY0_S": [5, 2.5], "OY2_B": [5, 2.5], "OY2_S": [5, 2.5], "P_S": [5, 2.5], "R_S": [5, 2.5], "S_S": [5, 2.5], "SH_S": [5, 2.5], "T_S": [5, 2.5], "TH_S": [5, 2.5], "UH_B": [5, 2.5], "UH_E": [5, 2.5], "UH_I": [5, 2.5], "UH_S": [5, 2.5], "UH0_B": [5, 2.5], "UH0_E": [5, 2.5], "UH0_S": [5, 2.5], "UH1_B": [5, 2.5], "UH1_S": [5, 2.5], "UH2_B": [5, 2.5], "UH2_E": [5, 2.5], "UH2_S": [5, 2.5], "UW_B": [5, 2.5], "UW_E": [5, 2.5], "UW_I": [5, 2.5], "UW_S": [5, 2.5], "UW0_S": [5, 2.5], "UW1_S": [5, 2.5], "UW2_B": [5, 2.5], "UW2_S": [5, 2.5], "V_S": [5, 2.5], "W_E": [5, 2.5], "W_S": [5, 2.5], "Y_E": [5, 2.5], "Y_S": [5, 2.5], "Z_S": [5, 2.5], "ZH_S": [5, 2.5]} diff --git a/SpeechT5/SpeechLM/speechlm/data_process/phoneme_tokenizer/repeat_withou_insert_sil_less_4375.py b/SpeechT5/SpeechLM/speechlm/data_process/phoneme_tokenizer/repeat_withou_insert_sil_less_4375.py new file mode 100644 index 0000000000000000000000000000000000000000..8c443611002a37318fea7c3d6ac42599c3ce3568 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/phoneme_tokenizer/repeat_withou_insert_sil_less_4375.py @@ -0,0 +1,41 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import sys, json, tqdm +import numpy as np + +input_file = sys.argv[1] +mean_and_std_file = sys.argv[2] +out_file = sys.argv[3] + +mean_and_std = json.load(open(mean_and_std_file, 'r')) + +with open(input_file, 'r') as f, open(out_file, 'w') as w: + for line in tqdm.tqdm(f): + l = line.split() + + new_l = [] + for phn in l: + if phn not in mean_and_std: + mean_and_std[phn] = [5, 2.5] + print(f'unk phone {phn}') + n = max(1, round(np.random.normal(loc=mean_and_std[phn][0], scale=mean_and_std[phn][1]))) + new_l.extend([phn] * int(n)) + + minus = 0 + while len(new_l) >= 4375: + minus += 1 + new_l = [] + for phn in l: + n = max(1, round(mean_and_std[phn][0] - minus)) + new_l.extend([phn] * n) + print(f"too long line try minus {minus}") + + w.write(' '.join(new_l)+'\n') + diff --git a/SpeechT5/SpeechLM/speechlm/data_process/prepare_covost2_enxx.sh b/SpeechT5/SpeechLM/speechlm/data_process/prepare_covost2_enxx.sh new file mode 100644 index 0000000000000000000000000000000000000000..4d316a453582c8da003234385c709840f8dde855 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/prepare_covost2_enxx.sh @@ -0,0 +1,45 @@ + +#!/bin/bash +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 +[ $# -lt 1 ] && echo "Usage: $0 [root=${PWD}/dataset/CommonVoice/v4]" && exit 0 +cwd=${PWD} +src=${PWD}/speechlm/data_process +lang=$1 +root=$2 +[ -z $root ] && root="${PWD}/dataset/CommonVoice/v4" +set -e -o pipefail -u + + +### step1, convert mp3 to wav +cd $root/en && mkdir -p wav +cut -f2 validated.tsv | sed '1d' | sed "s|^|${root}/en/clips/|" > validated.id +for i in $(seq 0 39); do + echo extracting $i; + python $src/covost2/mp3_to_wav.py -i validated.id -n 40 -r $i & +done +wait +cd $cwd + + +### step2, manifest +datadir="$root/en/en-$lang" && mkdir -p $datadir && cd $datadir +python /mnt/default/v-ziqzhang/code/stpretrain_scripts/data_process/covost2/prepare_covost_data.py --data-root $root --src-lang en --tgt-lang $lang --vocab-type char +mv ../*en_${lang}.* ./ + +# adjust config_base_en${lang}.yaml +echo "bpe_tokenizer:" > config_base_en${lang}.yaml +echo " bpe: sentencepiece" >> config_base_en${lang}.yaml +echo " sentencepiece_model: spm_char_st_en_de.model" >> config_base_en${lang}.yaml +echo "" >> config_base_en${lang}.yaml +echo "shuffle: false" >> config_base_en${lang}.yaml +echo "use_audio_input: true" >> config_base_en${lang}.yaml +echo "use_sample_rate: 16000" >> config_base_en${lang}.yaml +echo "standardize_audio: false" >> config_base_en${lang}.yaml +echo "vocab_filename: spm_char_st_en_de.txt" >> config_base_en${lang}.yaml +echo "" >> config_base_en${lang}.yaml +echo "# required by speech_to_text task but never used" >> config_base_en${lang}.yaml +echo "input_channels: 1" >> config_base_en${lang}.yaml +echo "input_feat_per_channel: 1" >> config_base_en${lang}.yaml +echo "" >> config_base_en${lang}.yaml +# adjust config_large_en${lang}.yaml +cat config_base_en${lang}.yaml | sed "s|standardize_audio: false|standardize_audio: true|" > config_large_en${lang}.yaml diff --git a/SpeechT5/SpeechLM/speechlm/data_process/prepare_phn2ltr_librilm.sh b/SpeechT5/SpeechLM/speechlm/data_process/prepare_phn2ltr_librilm.sh new file mode 100644 index 0000000000000000000000000000000000000000..9ffdf81adf2eef3e4548b7ed61cb02fce41bb205 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/prepare_phn2ltr_librilm.sh @@ -0,0 +1,57 @@ +#!/bin/bash +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 +cwd=${PWD} +src=${PWD}/speechlm/data_process + +set -e +mkdir -p dataset/LibriLM/phone_unit/tmp && cd dataset/LibriLM + +if [ ! -f librispeech-lm-norm.txt ]; then + echo "--------------------------------------------------------------------------------------" + echo "--------Downloading and unpacking librispeech-lm-norm.txt ..." + echo "--------------------------------------------------------------------------------------" + wget -c https://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz + gzip -d librispeech-lm-norm.txt.gz +fi + +# head -1000000 librispeech-lm-norm.txt > phone_unit/tmp/librispeech-lm-norm.txt +cd phone_unit/ + +echo "--------------------------------------------------------------------------------------" +echo "--------Tokenize the text..." +echo "--------------------------------------------------------------------------------------" +cat ../librispeech-lm-norm.txt | sed '1d' | python $src/wrd2ltr.py > tmp/librilm.ltr + +echo "--------------------------------------------------------------------------------------" +echo "--------Tokenize the text to the kaldi-style phonemes ..." +echo "--------------------------------------------------------------------------------------" +python $src/phoneme_tokenizer/ltr2kaldi_phn_sil025.py -i tmp/librilm.ltr -o tmp/librilm +cat tmp/librilm.kaldi_phn_sil025 | sed 's/SIL_S/SIL/g' > tmp/librilm.phn + +echo "--------------------------------------------------------------------------------------" +echo "--------Filter too long samples and up-sample phonemes ..." +echo "--------------------------------------------------------------------------------------" +python $src/filter_paireddata_by_len.py -i tmp/librilm -o tmp/librilm_l2k -s phn -t ltr -m 2000 +python $src/phoneme_tokenizer/repeat_withou_insert_sil_less_4375.py \ + tmp/librilm_l2k.phn \ + $src/phoneme_tokenizer/mean5_and_std25_sil14_spn32.dict \ + tmp/librilm_l2k_upsample.phn + +mv tmp/librilm_l2k.ltr tmp/librilm_l2k_upsample.ltr +python $src/filter_paireddata_by_len.py -i tmp/librilm_l2k_upsample -o train_text.phn-ltr -s phn -t ltr -m 2800 +### the max-length is set to filter the data, considering the batch size (in Large setting, 900,000/320 = 2812 tokens in a batch). + + +echo "--------------------------------------------------------------------------------------" +echo "--------Create binary files ..." +echo "--------------------------------------------------------------------------------------" +[ ! -f bin-idx/dict.phn.txt ] && echo "dict ${cwd}/dataset/LibriLM/bin-idx/dict.phn.txt not found!" && exit 1 +[ ! -f bin-idx/dict.ltr.txt ] && echo "dict ${cwd}/dataset/LibriLM/bin-idx/dict.ltr.txt not found!" && exit 1 +bash $src/txt2idx.sh train_text.phn-ltr.phn bin-idx bin-idx/dict.phn.txt +bash $src/txt2idx.sh train_text.phn-ltr.ltr bin-idx bin-idx/dict.ltr.txt + +rm -r tmp +cd - +echo "--------------------------------------------------------------------------------------" +echo "--------Done! files are in ${PWD}/dataset/LibriLM" +echo "--------------------------------------------------------------------------------------" diff --git a/SpeechT5/SpeechLM/speechlm/data_process/txt2idx.sh b/SpeechT5/SpeechLM/speechlm/data_process/txt2idx.sh new file mode 100644 index 0000000000000000000000000000000000000000..4442bf94cc481cc67694289c9d8b128a398d84c4 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/txt2idx.sh @@ -0,0 +1,30 @@ +#!/bin/bash +[ $# -lt 3 ] && echo "Usage: $0 " && exit 0 + +input=$1 +outdir=$2 +DICT=$3 +suffix=$4 +outname=${input##*/} +outname=${outname%.txt*} +[ -z $input ] && echo "You must specify a source file" && exit 1 + +[ -z $DICT ] && echo "No dict was specified!" && exit 1 +[ -z $outdir ] && outdir=${input%/*} +[ -z $outdir ] && outdir="." +[ ! -d $outdir ] && mkdir -p $outdir + +echo "------------------------------- creating idx/bin--------------------------------------------" +echo "$input --> $outdir/${outname}${suffix}.idx" +fairseq-preprocess \ + --only-source \ + --trainpref $input \ + --destdir $outdir \ + --thresholdsrc 0 \ + --srcdict ${DICT} \ + --workers 40 + +mv $outdir/train.idx $outdir/${outname}${suffix}.idx +mv $outdir/train.bin $outdir/${outname}${suffix}.bin +echo "----------------------------------- done --------------------------------------------" + diff --git a/SpeechT5/SpeechLM/speechlm/data_process/wrd2ltr.py b/SpeechT5/SpeechLM/speechlm/data_process/wrd2ltr.py new file mode 100644 index 0000000000000000000000000000000000000000..8aa48e62e6c4b302a73a3fc3656b9c78b7e06ea9 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/data_process/wrd2ltr.py @@ -0,0 +1,12 @@ +import sys + +def main(): + for line in sys.stdin: + line = line.replace("", "") + line = " ".join(line.strip().split()) + line = line.replace(" ", "|").upper() + "|" + print(" ".join(line)) + +if __name__ == "__main__": + main() + diff --git a/SpeechT5/SpeechLM/speechlm/generate_unit.py b/SpeechT5/SpeechLM/speechlm/generate_unit.py new file mode 100644 index 0000000000000000000000000000000000000000..690ea28c41021c278c927a79f4bf508229111e22 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/generate_unit.py @@ -0,0 +1,412 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +""" +Modified form: https://github.com/facebookresearch/fairseq/blob/272c4c5197250997148fb12c0db6306035f166a4/fairseq_cli/generate.py +""" + +import ast +import logging +import math +import os +import sys +from argparse import Namespace +from itertools import chain + +import numpy as np +import torch +from omegaconf import DictConfig + +from fairseq import checkpoint_utils, options, scoring, tasks, utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.logging import progress_bar +from fairseq.logging.meters import StopwatchMeter, TimeMeter + + +def main(cfg: DictConfig): + + if isinstance(cfg, Namespace): + cfg = convert_namespace_to_omegaconf(cfg) + + assert cfg.common_eval.path is not None, "--path required for generation!" + assert ( + not cfg.generation.sampling or cfg.generation.nbest == cfg.generation.beam + ), "--sampling requires --nbest to be equal to --beam" + assert ( + cfg.generation.replace_unk is None or cfg.dataset.dataset_impl == "raw" + ), "--replace-unk requires a raw text dataset (--dataset-impl=raw)" + + if cfg.common_eval.results_path is not None: + os.makedirs(cfg.common_eval.results_path, exist_ok=True) + output_path = os.path.join( + cfg.common_eval.results_path, + "generate-{}.txt".format(cfg.dataset.gen_subset), + ) + with open(output_path, "w", buffering=1, encoding="utf-8") as h: + return _main(cfg, h) + else: + return _main(cfg, sys.stdout) + + +def get_symbols_to_strip_from_output(generator): + if hasattr(generator, "symbols_to_strip_from_output"): + return generator.symbols_to_strip_from_output + else: + return {generator.eos} + + +def _main(cfg: DictConfig, output_file): + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=output_file, + ) + logger = logging.getLogger("fairseq_cli.generate") + + utils.import_user_module(cfg.common) + + if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None: + cfg.dataset.max_tokens = 12000 + logger.info(cfg) + + # Fix seed for stochastic decoding + if cfg.common.seed is not None and not cfg.generation.no_seed_provided: + np.random.seed(cfg.common.seed) + utils.set_torch_seed(cfg.common.seed) + + use_cuda = torch.cuda.is_available() and not cfg.common.cpu + + # Load dataset splits + task = tasks.setup_task(cfg.task) + + # Set dictionaries + try: + src_dict = getattr(task, "source_dictionary", None) + except NotImplementedError: + src_dict = None + tgt_dict = task.target_dictionary + + overrides = ast.literal_eval(cfg.common_eval.model_overrides) + + # Load ensemble + logger.info("loading model(s) from {}".format(cfg.common_eval.path)) + models, saved_cfg = checkpoint_utils.load_model_ensemble( + utils.split_paths(cfg.common_eval.path), + arg_overrides=overrides, + task=task, + suffix=cfg.checkpoint.checkpoint_suffix, + strict=(cfg.checkpoint.checkpoint_shard_count == 1), + num_shards=cfg.checkpoint.checkpoint_shard_count, + ) + + # loading the dataset should happen after the checkpoint has been loaded so we can give it the saved task config + task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task) + + if cfg.generation.lm_path is not None: + overrides["data"] = cfg.task.data + + try: + lms, _ = checkpoint_utils.load_model_ensemble( + [cfg.generation.lm_path], arg_overrides=overrides, task=None + ) + except: + logger.warning( + f"Failed to load language model! Please make sure that the language model dict is the same " + f"as target dict and is located in the data dir ({cfg.task.data})" + ) + raise + + assert len(lms) == 1 + else: + lms = [None] + + # Optimize ensemble for generation + for model in chain(models, lms): + if model is None: + continue + if cfg.common.fp16: + model.half() + if use_cuda and not cfg.distributed_training.pipeline_model_parallel: + model.cuda() + model.prepare_for_inference_(cfg) + + def _fp_convert_sample(sample): + def apply_half(t): + if t.dtype is torch.float32: + return t.to(dtype=torch.half) + return t + + def apply_bfloat16(t): + if t.dtype is torch.float32: + return t.to(dtype=torch.bfloat16) + return t + + if cfg.common.fp16: + sample = utils.apply_to_sample(apply_half, sample) + + if cfg.common.bf16: + sample = utils.apply_to_sample(apply_bfloat16, sample) + + return sample + + # Load alignment dictionary for unknown word replacement + # (None if no unknown word replacement, empty if no path to align dictionary) + align_dict = utils.load_align_dict(cfg.generation.replace_unk) + + # Load dataset (possibly sharded) + itr = task.get_batch_iterator( + dataset=task.dataset(cfg.dataset.gen_subset), + max_tokens=cfg.dataset.max_tokens, + max_sentences=cfg.dataset.batch_size, + max_positions=utils.resolve_max_positions( + task.max_positions(), *[m.max_positions() for m in models] + ), + ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=cfg.dataset.required_batch_size_multiple, + seed=cfg.common.seed, + num_shards=cfg.distributed_training.distributed_world_size, + shard_id=cfg.distributed_training.distributed_rank, + num_workers=cfg.dataset.num_workers, + data_buffer_size=cfg.dataset.data_buffer_size, + ).next_epoch_itr(shuffle=False) + progress = progress_bar.progress_bar( + itr, + log_format=cfg.common.log_format, + log_interval=cfg.common.log_interval, + default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), + ) + + # Initialize generator + gen_timer = StopwatchMeter() + + extra_gen_cls_kwargs = {"lm_model": lms[0], "lm_weight": cfg.generation.lm_weight} + generator = task.build_generator( + models, cfg.generation, extra_gen_cls_kwargs=extra_gen_cls_kwargs + ) + + # Handle tokenization and BPE + tokenizer = task.build_tokenizer(cfg.tokenizer) + bpe = task.build_bpe(cfg.bpe) + + def decode_fn(x): + if bpe is not None: + x = bpe.decode(x) + if tokenizer is not None: + x = tokenizer.decode(x) + return x + + scorer = scoring.build_scorer(cfg.scoring, None) + + num_sentences = 0 + has_target = True + wps_meter = TimeMeter() + for sample in progress: + sample = utils.move_to_cuda(sample) if use_cuda else sample + sample = _fp_convert_sample(sample) + if "net_input" not in sample: + continue + + prefix_tokens = None + if cfg.generation.prefix_size > 0: + prefix_tokens = sample["target"][:, : cfg.generation.prefix_size] + + constraints = None + if "constraints" in sample: + constraints = sample["constraints"] + + gen_timer.start() + hypos = task.inference_step( + generator, + models[0], + sample, + prefix_tokens=prefix_tokens, + constraints=constraints, + ) + num_generated_tokens = sum(len(h["unit"]) for h in hypos) + gen_timer.stop(num_generated_tokens) + + for i, sample_id in enumerate(sample["id"].tolist()): + has_target = sample["target"] is not None + + # Remove padding + if "src_tokens" in sample["net_input"]: + src_tokens = utils.strip_pad( + sample["net_input"]["src_tokens"][i, :], tgt_dict.pad() + ).cpu() + else: + src_tokens = None + + target_tokens = None + if has_target: + target_tokens = ( + utils.strip_pad(sample["target"][i, :], tgt_dict.pad()).cpu() + ) + + # Either retrieve the original sentences or regenerate them from tokens. + if align_dict is not None: + src_str = task.dataset(cfg.dataset.gen_subset).src.get_original_text( + sample_id + ) + target_str = task.dataset(cfg.dataset.gen_subset).tgt.get_original_text( + sample_id + ) + else: + if src_dict is not None: + src_str = src_dict.string(src_tokens, cfg.common_eval.post_process) + else: + src_str = "" + if has_target: + target_str = " ".join(map(str, target_tokens.numpy().tolist())) + + src_str = decode_fn(src_str) + + if not cfg.common_eval.quiet: + if src_dict is not None: + print("S-{}\t{}".format(sample_id, src_str), file=output_file) + if has_target: + print("T-{}\t{}".format(sample_id, target_str), file=output_file) + + # Process top predictions + j = 0 + hypo = hypos[i] + hypo_tokens = hypo["unit"].int().cpu() + hypo_str = " ".join(map(str, hypo_tokens.numpy().tolist())) + alignment = None + detok_hypo_str = hypo_str + # add duration prediction + hypo_duration = " ".join(map(str, hypo["duration"].int().cpu().numpy().tolist())) + hypo_fa_src_str = src_dict.string(hypo["fa_src"].cpu().numpy(), cfg.common_eval.post_process) + # hypo_fa_src_str = " ".join(map(str, hypo["fa_src"].int().cpu().numpy() - 4)) + + if not cfg.common_eval.quiet: + # score = hypo["score"] / math.log(2) # convert to base 2 + score = 0.00 + # original hypothesis (after tokenization and BPE) + # print( + # "H-{}\t{}\t{}".format(sample_id, score, hypo_str), + # file=output_file, + # ) + # detokenized hypothesis + print( + "D-{}\t{}\t{}".format(sample_id, score, detok_hypo_str), + file=output_file, + ) + # duration prediction + print( + "L-{}\t{}\t{}".format(sample_id, score, hypo_duration), + file=output_file, + ) + # force-aligned upsampled src-tokens + print( + "U-{}\t{}\t{}".format(sample_id, score, hypo_fa_src_str), + file=output_file, + ) + # print( + # "P-{}\t{}".format( + # sample_id, + # " ".join( + # map( + # lambda x: "{:.4f}".format(x), + # # convert from base e to base 2 + # hypo["positional_scores"] + # .div_(math.log(2)) + # .tolist(), + # ) + # ), + # ), + # file=output_file, + # ) + + if cfg.generation.print_alignment == "hard": + print( + "A-{}\t{}".format( + sample_id, + " ".join( + [ + "{}-{}".format(src_idx, tgt_idx) + for src_idx, tgt_idx in alignment + ] + ), + ), + file=output_file, + ) + if cfg.generation.print_alignment == "soft": + print( + "A-{}\t{}".format( + sample_id, + " ".join( + [",".join(src_probs) for src_probs in alignment] + ), + ), + file=output_file, + ) + + + # Score only the top hypothesis + if has_target and j == 0: + if hasattr(scorer, "add_string"): + scorer.add_string(target_str, detok_hypo_str) + else: + scorer.add(target_tokens, hypo_tokens) + + wps_meter.update(num_generated_tokens) + progress.log({"wps": round(wps_meter.avg)}) + num_sentences += ( + sample["nsentences"] if "nsentences" in sample else sample["id"].numel() + ) + + logger.info("NOTE: hypothesis and token scores are output in base 2") + logger.info( + "Translated {:,} sentences ({:,} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)".format( + num_sentences, + gen_timer.n, + gen_timer.sum, + num_sentences / gen_timer.sum, + 1.0 / gen_timer.avg, + ) + ) + if has_target: + if cfg.bpe and not cfg.generation.sacrebleu: + if cfg.common_eval.post_process: + logger.warning( + "BLEU score is being computed by splitting detokenized string on spaces, this is probably not what you want. Use --sacrebleu for standard 13a BLEU tokenization" + ) + else: + logger.warning( + "If you are using BPE on the target side, the BLEU score is computed on BPE tokens, not on proper words. Use --sacrebleu for standard 13a BLEU tokenization" + ) + # use print to be consistent with other main outputs: S-, H-, T-, D- and so on + print( + "Generate {} with beam={}: {}".format( + cfg.dataset.gen_subset, cfg.generation.beam, scorer.result_string() + ), + file=output_file, + ) + + return scorer + + +def cli_main(): + parser = options.get_generation_parser() + # TODO: replace this workaround with refactoring of `AudioPretraining` + parser.add_argument( + "--arch", + "-a", + metavar="ARCH", + default="wav2vec2", + help="Model architecture. For constructing tasks that rely on " + "model args (e.g. `AudioPretraining`)", + ) + args = options.parse_args_and_arch(parser) + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/SpeechT5/SpeechLM/speechlm/infer.py b/SpeechT5/SpeechLM/speechlm/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..ab80c15f8986233d004f3eae270e90c0cf1d5709 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/infer.py @@ -0,0 +1,486 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +""" +Modified form: https://github.com/facebookresearch/fairseq/blob/272c4c5197250997148fb12c0db6306035f166a4/examples/speech_recognition/new/infer.py +1. add "utils.import_user_module(cfg.common)" so that usr-dir can be loaded +""" + +import ast +import hashlib +import logging +import os +import shutil +import sys +from dataclasses import dataclass, field, is_dataclass +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union + +import editdistance +import torch +import torch.distributed as dist +import examples +from examples.speech_recognition.new.decoders.decoder_config import ( + DecoderConfig, + FlashlightDecoderConfig, +) +from examples.speech_recognition.new.decoders.decoder import Decoder +from fairseq import checkpoint_utils, distributed_utils, progress_bar, tasks, utils +from fairseq.data.data_utils import post_process +from fairseq.dataclass.configs import ( + CheckpointConfig, + CommonConfig, + CommonEvalConfig, + DatasetConfig, + DistributedTrainingConfig, + FairseqDataclass, +) +from fairseq.logging.meters import StopwatchMeter, TimeMeter +from fairseq.logging.progress_bar import BaseProgressBar +from fairseq.models.fairseq_model import FairseqModel +from omegaconf import OmegaConf + +import hydra +from hydra.core.config_store import ConfigStore + +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +config_path = Path(examples.speech_recognition.new.__path__[0]).resolve() / "conf" + + +@dataclass +class DecodingConfig(DecoderConfig, FlashlightDecoderConfig): + unique_wer_file: bool = field( + default=False, + metadata={"help": "If set, use a unique file for storing WER"}, + ) + results_path: Optional[str] = field( + default=None, + metadata={ + "help": "If set, write hypothesis and reference sentences into this directory" + }, + ) + + +@dataclass +class InferConfig(FairseqDataclass): + task: Any = None + decoding: DecodingConfig = DecodingConfig() + common: CommonConfig = CommonConfig() + common_eval: CommonEvalConfig = CommonEvalConfig() + checkpoint: CheckpointConfig = CheckpointConfig() + distributed_training: DistributedTrainingConfig = DistributedTrainingConfig() + dataset: DatasetConfig = DatasetConfig() + is_ax: bool = field( + default=False, + metadata={ + "help": "if true, assumes we are using ax for tuning and returns a tuple for ax to consume" + }, + ) + + +def reset_logging(): + root = logging.getLogger() + for handler in root.handlers: + root.removeHandler(handler) + root.setLevel(os.environ.get("LOGLEVEL", "INFO").upper()) + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter( + logging.Formatter( + fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + ) + root.addHandler(handler) + + +class InferenceProcessor: + cfg: InferConfig + + def __init__(self, cfg: InferConfig) -> None: + self.cfg = cfg + self.task = tasks.setup_task(cfg.task) + + models, saved_cfg = self.load_model_ensemble() + self.models = models + self.saved_cfg = saved_cfg + self.tgt_dict = self.task.target_dictionary + + self.task.load_dataset( + self.cfg.dataset.gen_subset, + task_cfg=saved_cfg.task, + ) + self.generator = Decoder(cfg.decoding, self.tgt_dict) + self.gen_timer = StopwatchMeter() + self.wps_meter = TimeMeter() + self.num_sentences = 0 + self.total_errors = 0 + self.total_length = 0 + + self.hypo_words_file = None + self.hypo_units_file = None + self.ref_words_file = None + self.ref_units_file = None + + self.progress_bar = self.build_progress_bar() + + def __enter__(self) -> "InferenceProcessor": + if self.cfg.decoding.results_path is not None: + self.hypo_words_file = self.get_res_file("hypo.word") + self.hypo_units_file = self.get_res_file("hypo.units") + self.ref_words_file = self.get_res_file("ref.word") + self.ref_units_file = self.get_res_file("ref.units") + return self + + def __exit__(self, *exc) -> bool: + if self.cfg.decoding.results_path is not None: + self.hypo_words_file.close() + self.hypo_units_file.close() + self.ref_words_file.close() + self.ref_units_file.close() + return False + + def __iter__(self) -> Any: + for sample in self.progress_bar: + if not self.cfg.common.cpu: + sample = utils.move_to_cuda(sample) + + # Happens on the last batch. + if "net_input" not in sample: + continue + yield sample + + def log(self, *args, **kwargs): + self.progress_bar.log(*args, **kwargs) + + def print(self, *args, **kwargs): + self.progress_bar.print(*args, **kwargs) + + def get_res_file(self, fname: str) -> None: + fname = os.path.join(self.cfg.decoding.results_path, fname) + if self.data_parallel_world_size > 1: + fname = f"{fname}.{self.data_parallel_rank}" + return open(fname, "w", buffering=1) + + def merge_shards(self) -> None: + """Merges all shard files into shard 0, then removes shard suffix.""" + + shard_id = self.data_parallel_rank + num_shards = self.data_parallel_world_size + + if self.data_parallel_world_size > 1: + + def merge_shards_with_root(fname: str) -> None: + fname = os.path.join(self.cfg.decoding.results_path, fname) + logger.info("Merging %s on shard %d", fname, shard_id) + base_fpath = Path(f"{fname}.0") + with open(base_fpath, "a") as out_file: + for s in range(1, num_shards): + shard_fpath = Path(f"{fname}.{s}") + with open(shard_fpath, "r") as in_file: + for line in in_file: + out_file.write(line) + shard_fpath.unlink() + shutil.move(f"{fname}.0", fname) + + dist.barrier() # ensure all shards finished writing + if shard_id == (0 % num_shards): + merge_shards_with_root("hypo.word") + if shard_id == (1 % num_shards): + merge_shards_with_root("hypo.units") + if shard_id == (2 % num_shards): + merge_shards_with_root("ref.word") + if shard_id == (3 % num_shards): + merge_shards_with_root("ref.units") + dist.barrier() + + def optimize_model(self, model: FairseqModel) -> None: + model.make_generation_fast_() + if self.cfg.common.fp16: + model.half() + if not self.cfg.common.cpu: + model.cuda() + + def load_model_ensemble(self) -> Tuple[List[FairseqModel], FairseqDataclass]: + arg_overrides = ast.literal_eval(self.cfg.common_eval.model_overrides) + models, saved_cfg = checkpoint_utils.load_model_ensemble( + utils.split_paths(self.cfg.common_eval.path, separator="\\"), + arg_overrides=arg_overrides, + task=self.task, + suffix=self.cfg.checkpoint.checkpoint_suffix, + strict=(self.cfg.checkpoint.checkpoint_shard_count == 1), + num_shards=self.cfg.checkpoint.checkpoint_shard_count, + ) + for model in models: + self.optimize_model(model) + return models, saved_cfg + + def get_dataset_itr(self, disable_iterator_cache: bool = False) -> None: + return self.task.get_batch_iterator( + dataset=self.task.dataset(self.cfg.dataset.gen_subset), + max_tokens=self.cfg.dataset.max_tokens, + max_sentences=self.cfg.dataset.batch_size, + max_positions=(sys.maxsize, sys.maxsize), + ignore_invalid_inputs=self.cfg.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=self.cfg.dataset.required_batch_size_multiple, + seed=self.cfg.common.seed, + num_shards=self.data_parallel_world_size, + shard_id=self.data_parallel_rank, + num_workers=self.cfg.dataset.num_workers, + data_buffer_size=self.cfg.dataset.data_buffer_size, + disable_iterator_cache=disable_iterator_cache, + ).next_epoch_itr(shuffle=False) + + def build_progress_bar( + self, + epoch: Optional[int] = None, + prefix: Optional[str] = None, + default_log_format: str = "tqdm", + ) -> BaseProgressBar: + return progress_bar.progress_bar( + iterator=self.get_dataset_itr(), + log_format=self.cfg.common.log_format, + log_interval=self.cfg.common.log_interval, + epoch=epoch, + prefix=prefix, + tensorboard_logdir=self.cfg.common.tensorboard_logdir, + default_log_format=default_log_format, + ) + + @property + def data_parallel_world_size(self): + if self.cfg.distributed_training.distributed_world_size == 1: + return 1 + return distributed_utils.get_data_parallel_world_size() + + @property + def data_parallel_rank(self): + if self.cfg.distributed_training.distributed_world_size == 1: + return 0 + return distributed_utils.get_data_parallel_rank() + + def process_sentence( + self, + sample: Dict[str, Any], + hypo: Dict[str, Any], + sid: int, + batch_id: int, + ) -> Tuple[int, int]: + speaker = None # Speaker can't be parsed from dataset. + + if "target_label" in sample: + toks = sample["target_label"] + else: + toks = sample["target"] + toks = toks[batch_id, :] + + # Processes hypothesis. + hyp_pieces = self.tgt_dict.string(hypo["tokens"].int().cpu()) + if "words" in hypo: + hyp_words = " ".join(hypo["words"]) + else: + hyp_words = post_process(hyp_pieces, self.cfg.common_eval.post_process) + + # Processes target. + target_tokens = utils.strip_pad(toks, self.tgt_dict.pad()) + tgt_pieces = self.tgt_dict.string(target_tokens.int().cpu()) + tgt_words = post_process(tgt_pieces, self.cfg.common_eval.post_process) + + if self.cfg.decoding.results_path is not None: + print(f"{hyp_pieces} ({speaker}-{sid})", file=self.hypo_units_file) + print(f"{hyp_words} ({speaker}-{sid})", file=self.hypo_words_file) + print(f"{tgt_pieces} ({speaker}-{sid})", file=self.ref_units_file) + print(f"{tgt_words} ({speaker}-{sid})", file=self.ref_words_file) + + if not self.cfg.common_eval.quiet: + logger.info(f"HYPO: {hyp_words}") + logger.info(f"REF: {tgt_words}") + logger.info("---------------------") + + hyp_words, tgt_words = hyp_words.split(), tgt_words.split() + + return editdistance.eval(hyp_words, tgt_words), len(tgt_words) + + def process_sample(self, sample: Dict[str, Any]) -> None: + self.gen_timer.start() + hypos = self.task.inference_step( + generator=self.generator, + models=self.models, + sample=sample, + ) + num_generated_tokens = sum(len(h[0]["tokens"]) for h in hypos) + self.gen_timer.stop(num_generated_tokens) + self.wps_meter.update(num_generated_tokens) + + for batch_id, sample_id in enumerate(sample["id"].tolist()): + errs, length = self.process_sentence( + sample=sample, + sid=sample_id, + batch_id=batch_id, + hypo=hypos[batch_id][0], + ) + self.total_errors += errs + self.total_length += length + + self.log({"wps": round(self.wps_meter.avg)}) + if "nsentences" in sample: + self.num_sentences += sample["nsentences"] + else: + self.num_sentences += sample["id"].numel() + + def log_generation_time(self) -> None: + logger.info( + "Processed %d sentences (%d tokens) in %.1fs %.2f " + "sentences per second, %.2f tokens per second)", + self.num_sentences, + self.gen_timer.n, + self.gen_timer.sum, + self.num_sentences / (self.gen_timer.sum + 1e-6), + 1.0 / (self.gen_timer.avg + 1e-6), + ) + + +def parse_wer(wer_file: Path) -> float: + with open(wer_file, "r") as f: + return float(f.readline().strip().split(" ")[1]) + + +def get_wer_file(cfg: InferConfig) -> Path: + """Hashes the decoding parameters to a unique file ID.""" + base_path = "wer" + if cfg.decoding.results_path is not None: + base_path = os.path.join(cfg.decoding.results_path, base_path) + + if cfg.decoding.unique_wer_file: + yaml_str = OmegaConf.to_yaml(cfg.decoding) + fid = int(hashlib.md5(yaml_str.encode("utf-8")).hexdigest(), 16) + return Path(f"{base_path}.{fid % 1000000}") + else: + return Path(base_path) + + +def main(cfg: InferConfig) -> float: + """Entry point for main processing logic. + + Args: + cfg: The inferance configuration to use. + wer: Optional shared memory pointer for returning the WER. If not None, + the final WER value will be written here instead of being returned. + + Returns: + The final WER if `wer` is None, otherwise None. + """ + + utils.import_user_module(cfg.common) + + yaml_str, wer_file = OmegaConf.to_yaml(cfg.decoding), get_wer_file(cfg) + + # Validates the provided configuration. + if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None: + cfg.dataset.max_tokens = 4000000 + if not cfg.common.cpu and not torch.cuda.is_available(): + raise ValueError("CUDA not found; set `cpu=True` to run without CUDA") + + logger.info(cfg.common_eval.path) + + with InferenceProcessor(cfg) as processor: + for sample in processor: + processor.process_sample(sample) + + processor.log_generation_time() + + if cfg.decoding.results_path is not None: + processor.merge_shards() + + errs_t, leng_t = processor.total_errors, processor.total_length + + if cfg.common.cpu: + logger.warning("Merging WER requires CUDA.") + elif processor.data_parallel_world_size > 1: + stats = torch.LongTensor([errs_t, leng_t]).cuda() + dist.all_reduce(stats, op=dist.ReduceOp.SUM) + errs_t, leng_t = stats[0].item(), stats[1].item() + + wer = errs_t * 100.0 / leng_t + + if distributed_utils.is_master(cfg.distributed_training): + with open(wer_file, "w") as f: + f.write( + ( + f"WER: {wer}\n" + f"err / num_ref_words = {errs_t} / {leng_t}\n\n" + f"{yaml_str}" + ) + ) + + return wer + + +@hydra.main(config_path=config_path, config_name="infer") +def hydra_main(cfg: InferConfig) -> Union[float, Tuple[float, Optional[float]]]: + container = OmegaConf.to_container(cfg, resolve=True, enum_to_str=True) + cfg = OmegaConf.create(container) + OmegaConf.set_struct(cfg, True) + + if cfg.common.reset_logging: + reset_logging() + + utils.import_user_module(cfg.common) + + # logger.info("Config:\n%s", OmegaConf.to_yaml(cfg)) + wer = float("inf") + + try: + if cfg.common.profile: + with torch.cuda.profiler.profile(): + with torch.autograd.profiler.emit_nvtx(): + distributed_utils.call_main(cfg, main) + else: + distributed_utils.call_main(cfg, main) + + wer = parse_wer(get_wer_file(cfg)) + except BaseException as e: # pylint: disable=broad-except + if not cfg.common.suppress_crashes: + raise + else: + logger.error("Crashed! %s", str(e)) + + logger.info("Word error rate: %.4f", wer) + if cfg.is_ax: + return wer, None + + return wer + + +def cli_main() -> None: + try: + from hydra._internal.utils import ( + get_args, + ) # pylint: disable=import-outside-toplevel + + cfg_name = get_args().config_name or "infer" + except ImportError: + logger.warning("Failed to get config name from hydra args") + cfg_name = "infer" + + cs = ConfigStore.instance() + cs.store(name=cfg_name, node=InferConfig) + + for k in InferConfig.__dataclass_fields__: + if is_dataclass(InferConfig.__dataclass_fields__[k].type): + v = InferConfig.__dataclass_fields__[k].default + cs.store(name=k, node=v) + + hydra_main() # pylint: disable=no-value-for-parameter + + +if __name__ == "__main__": + cli_main() diff --git a/SpeechT5/SpeechLM/speechlm/models/__init__.py b/SpeechT5/SpeechLM/speechlm/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SpeechT5/SpeechLM/speechlm/models/fasttext2unit.py b/SpeechT5/SpeechLM/speechlm/models/fasttext2unit.py new file mode 100644 index 0000000000000000000000000000000000000000..14c27b5ea21faff956cafdf96f0cc6fdb16e3a96 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/models/fasttext2unit.py @@ -0,0 +1,226 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import torch + +from fairseq import utils +from fairseq.models import ( + FairseqEncoderModel, + register_model, + register_model_architecture, +) +from fairseq.models.text_to_speech import fastspeech2 + +logger = logging.getLogger(__name__) + +class VarianceAdaptor(fastspeech2.VarianceAdaptor): + def __init__(self, args): + super().__init__(args) + self.use_pitch = args.use_pitch + self.use_energe = args.use_energe + + def forward( + self, + x, + padding_mask, + durations=None, + pitches=None, + energies=None, + d_factor=1.0, + p_factor=1.0, + e_factor=1.0, + ): + # x: B x T x C + log_dur_out = self.duration_predictor(x) + dur_out = torch.clamp( + torch.round((torch.exp(log_dur_out) - 1) * d_factor).long(), min=0 + ) + dur_out.masked_fill_(padding_mask, 0) + + if self.use_pitch: + pitch_out, pitch_emb = self.get_pitch_emb(x, pitches, p_factor) + x = x + pitch_emb + else: + pitch_out = None + + if self.use_energe: + energy_out, energy_emb = self.get_energy_emb(x, energies, e_factor) + x = x + energy_emb + else: + energy_out = None + + x, out_lens = self.length_regulator( + x, dur_out if durations is None else durations + ) + + return x, out_lens, log_dur_out, pitch_out, energy_out + + +class FastSpeech2Encoder(fastspeech2.FastSpeech2Encoder): + def __init__(self, args, src_dict, embed_speaker): + super().__init__(args, src_dict, embed_speaker) + self.var_adaptor = VarianceAdaptor(args) + self.apply(fastspeech2.model_init) + +@register_model("fasttext2unit") +class FastText2UnitModel(FairseqEncoderModel): + """ + Implementation for https://arxiv.org/abs/2006.04558 + """ + + NON_AUTOREGRESSIVE = True + + + @staticmethod + def add_args(parser): + parser.add_argument("--dropout", type=float) + parser.add_argument("--output-frame-dim", type=int) + parser.add_argument("--speaker-embed-dim", type=int) + # FFT blocks + parser.add_argument("--fft-hidden-dim", type=int) + parser.add_argument("--fft-kernel-size", type=int) + parser.add_argument("--attention-dropout", type=float) + parser.add_argument("--encoder-layers", type=int) + parser.add_argument("--encoder-embed-dim", type=int) + parser.add_argument("--encoder-attention-heads", type=int) + parser.add_argument("--decoder-layers", type=int) + parser.add_argument("--decoder-embed-dim", type=int) + parser.add_argument("--decoder-attention-heads", type=int) + # variance predictor + parser.add_argument("--var-pred-n-bins", type=int) + parser.add_argument("--var-pred-hidden-dim", type=int) + parser.add_argument("--var-pred-kernel-size", type=int) + parser.add_argument("--var-pred-dropout", type=float) + # postnet + parser.add_argument("--add-postnet", action="store_true") + parser.add_argument("--postnet-dropout", type=float) + parser.add_argument("--postnet-layers", type=int) + parser.add_argument("--postnet-conv-dim", type=int) + parser.add_argument("--postnet-conv-kernel-size", type=int) + # pitch & energe + parser.add_argument("--use-pitch", action="store_true") + parser.add_argument("--use-energe", action="store_true") + + + def __init__(self, encoder, args, src_dict): + super().__init__(encoder) + self._num_updates = 0 + + @classmethod + def build_model(cls, args, task): + embed_speaker = task.get_speaker_embeddings(args) + if args.output_frame_dim == -1: + args.output_frame_dim = len(task.tgt_dict) + encoder = FastSpeech2Encoder(args, task.src_dict, embed_speaker) + return cls(encoder, args, task.src_dict) + + def set_num_updates(self, num_updates): + super().set_num_updates(num_updates) + self._num_updates = num_updates + + def get_normalized_probs(self, net_output, log_probs, sample=None): + logits = net_output[0] + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + +@register_model_architecture("fasttext2unit", "fasttext2unit_s") +def base_architecture(args): + args.dropout = getattr(args, "dropout", 0.2) + args.output_frame_dim = getattr(args, "output_frame_dim", -1) + args.speaker_embed_dim = getattr(args, "speaker_embed_dim", 256) + # FFT blocks + args.fft_hidden_dim = getattr(args, "fft_hidden_dim", 1024) + args.fft_kernel_size = getattr(args, "fft_kernel_size", 9) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.encoder_layers = getattr(args, "encoder_layers", 4) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 2) + args.decoder_layers = getattr(args, "decoder_layers", 4) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 2) + # variance predictor + args.var_pred_n_bins = getattr(args, "var_pred_n_bins", 256) + args.var_pred_hidden_dim = getattr(args, "var_pred_hidden_dim", 256) + args.var_pred_kernel_size = getattr(args, "var_pred_kernel_size", 3) + args.var_pred_dropout = getattr(args, "var_pred_dropout", 0.5) + # postnet + args.add_postnet = getattr(args, "add_postnet", False) + args.postnet_dropout = getattr(args, "postnet_dropout", 0.5) + args.postnet_layers = getattr(args, "postnet_layers", 5) + args.postnet_conv_dim = getattr(args, "postnet_conv_dim", 512) + args.postnet_conv_kernel_size = getattr(args, "postnet_conv_kernel_size", 5) + # pitch & energe + args.use_pitch = getattr(args, "use_pitch", False) + args.use_energe = getattr(args, "use_energe", False) + + +@register_model_architecture("fasttext2unit", "fasttext2unit_m") +def base_architecture(args): + args.dropout = getattr(args, "dropout", 0.2) + args.output_frame_dim = getattr(args, "output_frame_dim", -1) + args.speaker_embed_dim = getattr(args, "speaker_embed_dim", 256) + # FFT blocks + args.fft_hidden_dim = getattr(args, "fft_hidden_dim", 1024) + args.fft_kernel_size = getattr(args, "fft_kernel_size", 9) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 2) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 2) + # variance predictor + args.var_pred_n_bins = getattr(args, "var_pred_n_bins", 256) + args.var_pred_hidden_dim = getattr(args, "var_pred_hidden_dim", 256) + args.var_pred_kernel_size = getattr(args, "var_pred_kernel_size", 3) + args.var_pred_dropout = getattr(args, "var_pred_dropout", 0.5) + # postnet + args.add_postnet = getattr(args, "add_postnet", False) + args.postnet_dropout = getattr(args, "postnet_dropout", 0.5) + args.postnet_layers = getattr(args, "postnet_layers", 5) + args.postnet_conv_dim = getattr(args, "postnet_conv_dim", 512) + args.postnet_conv_kernel_size = getattr(args, "postnet_conv_kernel_size", 5) + # pitch & energe + args.use_pitch = getattr(args, "use_pitch", False) + args.use_energe = getattr(args, "use_energe", False) + + +@register_model_architecture("fasttext2unit", "fasttext2unit_l") +def base_architecture(args): + args.dropout = getattr(args, "dropout", 0.2) + args.output_frame_dim = getattr(args, "output_frame_dim", -1) + args.speaker_embed_dim = getattr(args, "speaker_embed_dim", 256) + # FFT blocks + args.fft_hidden_dim = getattr(args, "fft_hidden_dim", 1536) + args.fft_kernel_size = getattr(args, "fft_kernel_size", 9) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.encoder_layers = getattr(args, "encoder_layers", 6) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 384) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 6) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 384) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 6) + # variance predictor + args.var_pred_n_bins = getattr(args, "var_pred_n_bins", 256) + args.var_pred_hidden_dim = getattr(args, "var_pred_hidden_dim", 256) + args.var_pred_kernel_size = getattr(args, "var_pred_kernel_size", 3) + args.var_pred_dropout = getattr(args, "var_pred_dropout", 0.5) + # postnet + args.add_postnet = getattr(args, "add_postnet", False) + args.postnet_dropout = getattr(args, "postnet_dropout", 0.5) + args.postnet_layers = getattr(args, "postnet_layers", 5) + args.postnet_conv_dim = getattr(args, "postnet_conv_dim", 512) + args.postnet_conv_kernel_size = getattr(args, "postnet_conv_kernel_size", 5) + # pitch & energe + args.use_pitch = getattr(args, "use_pitch", False) + args.use_energe = getattr(args, "use_energe", False) diff --git a/SpeechT5/SpeechLM/speechlm/models/speechlm.py b/SpeechT5/SpeechLM/speechlm/models/speechlm.py new file mode 100644 index 0000000000000000000000000000000000000000..038fe83875c04a890e21926477882c9207fa7db1 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/models/speechlm.py @@ -0,0 +1,720 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import utils, checkpoint_utils +from fairseq.data.data_utils import compute_mask_indices +from fairseq.data.dictionary import Dictionary +from fairseq.dataclass import ChoiceEnum +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.transformer import Embedding +from fairseq.file_io import PathManager +from torch import Tensor +from fairseq.models.wav2vec.wav2vec2 import ConvFeatureExtractionModel +from fairseq.modules import GradMultiply, LayerNorm +from fairseq.tasks.hubert_pretraining import ( + HubertPretrainingConfig, + HubertPretrainingTask, +) +from fairseq.models.hubert import HubertConfig +from fairseq.models.transformer import TransformerConfig +from speechlm.modules.w2v_encoder import TransformerEncoder +from speechlm.modules.transformer_encoder import TransformerEncoderBase + +logger = logging.getLogger(__name__) + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) + + +@dataclass + +class SpeechlmConfig(HubertConfig): + use_rel_pos_enc: bool = field( + default=False, + metadata={"help": "whether to use relative positional encoding"}, + ) + scaling_for_att: float = field( + default=1.0, + metadata={"help": "scaling for attention weights to prevent overflow issue (for large model)"}, + ) + + # unit encoder-decoder + text_transformer: TransformerConfig = TransformerConfig() + add_unit_encoder: bool = field( + default=False, + metadata={"help": "add unit encoder"}, + ) + add_decoder: bool = field( + default=False, + metadata={"help": "add decoder"}, + ) + add_text_ctc: bool = field( + default=False, + metadata={"help": "add_text_ctc head"}, + ) + text_ctc_conv_kernel: int = field( + default=2, + metadata={"help": "text_ctc_conv kernel size"}, + ) + mask_u2t: bool = field( + default=True, + metadata={"help": "mask the unit input in unit-to-text task"}, + ) + compute_mum: bool = field( + default=False, + metadata={"help": "compute MLM loss in unit-to-text task"}, + ) + + # embedding mixing + mix_with_unit: bool = field( + default=True, + metadata={"help": "mix with the unit embeddings"}, + ) + use_pred_unit: bool = field( + default=False, + metadata={"help": "use the embeddings of predicted units"}, + ) + l2_embedding: bool = field( + default=False, + metadata={"help": "compute l2 loss between unit embedding and unit hidden state"}, + ) + + # Finetune related + encoder_dict_size: int = field( + default=-1, + metadata={"help": "text encoder dictionary dimension"}, + ) + + decoder_dict_size: int = field( + default=-1, + metadata={"help": "decoder dictionary dimension"}, + ) + + +@register_model("speechlm", dataclass=SpeechlmConfig) +class SpeechlmModel(BaseFairseqModel): + def __init__( + self, + cfg: SpeechlmConfig, + task_cfg: HubertPretrainingConfig, + dictionaries: List[Dictionary], + unit_dictionary: Dictionary = None, + text_tgt_dictionary: Dictionary = None, + ) -> None: + super().__init__() + logger.info(f"SpeechlmModel Config: {cfg}") + + feature_enc_layers = eval(cfg.conv_feature_layers) # noqa + self.embed = feature_enc_layers[-1][0] + + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + mode=cfg.extractor_mode, + conv_bias=cfg.conv_bias, + ) + feature_ds_rate = np.prod([s for _, _, s in feature_enc_layers]) + self.feat2tar_ratio = cfg.label_rate * feature_ds_rate / task_cfg.sample_rate + + self.post_extract_proj = ( + nn.Linear(self.embed, cfg.encoder_embed_dim) + if self.embed != cfg.encoder_embed_dim + else None + ) + + self.mask_prob = cfg.mask_prob + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length = cfg.mask_length + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + self.logit_temp = cfg.logit_temp + self.skip_masked = cfg.skip_masked + self.skip_nomask = cfg.skip_nomask + + final_dim = cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim + + self.mask_emb = nn.Parameter( + torch.FloatTensor(cfg.encoder_embed_dim).uniform_() + ) + + self.encoder = TransformerEncoder(cfg) + self.layer_norm = LayerNorm(self.embed) + + self.target_glu = None + if cfg.target_glu: + self.target_glu = nn.Sequential( + nn.Linear(final_dim, final_dim * 2), nn.GLU() + ) + + self.final_dim = final_dim + assert len(dictionaries) <= 2, f"Only support <=2 kinds of targets, get {len(dictionaries)} dictionaries" + if len(dictionaries) == 1: + dictionaries = [dictionaries[0], dictionaries[0]] + + self.final_proj_list = nn.ModuleList([ + nn.Linear(cfg.encoder_embed_dim, final_dim) for _ in dictionaries + ]) + + self.num_classes = [len(d) for d in dictionaries] + self.label_embs_list = nn.ParameterList([ + nn.Parameter(torch.FloatTensor(n, final_dim)) for n in self.num_classes + ]) + for i in range(len(self.num_classes)): + nn.init.uniform_(self.label_embs_list[i]) + + ### build unit encoder: + self.mask_u2t = cfg.mask_u2t + self.compute_mum = cfg.compute_mum + self.add_text_ctc = cfg.add_text_ctc + self.text_ctc_conv_kernel = cfg.text_ctc_conv_kernel + self.padding_idx = unit_dictionary.pad() + self.unit_mask_idx = unit_dictionary.index("") + + self.add_unit_encoder = cfg.add_unit_encoder + self.mix_with_unit = cfg.mix_with_unit + self.use_pred_unit = cfg.use_pred_unit + self.l2_embedding = cfg.l2_embedding + if self.add_unit_encoder: + assert len(unit_dictionary) == self.num_classes[0], f"unit_dictionary: {len(unit_dictionary)}, self.num_classes[0]: {self.num_classes[0]}" + ### build unit pre-net, and shared with hubert label_embs if needed (default: False) + self.unit_embed_tokens = self.build_embedding( + unit_dictionary, + cfg.text_transformer.encoder.embed_dim, + ) + if self.final_dim == cfg.text_transformer.encoder.embed_dim: + logger.info("Share label_embs[0] with unit_embed_tokens ...") + nn.init.uniform_(self.unit_embed_tokens.weight) + self.label_embs_list[0] = self.unit_embed_tokens.weight + + ### build unit encoder + self.unit_encoder = TransformerEncoderBase( + cfg.text_transformer, + unit_dictionary, + self.unit_embed_tokens, + use_rel_pos_enc=cfg.use_rel_pos_enc, + scaling_for_att=cfg.scaling_for_att, + ) + + ### build text ctc head + if self.add_text_ctc: + conv = nn.Conv1d( + cfg.text_transformer.encoder.embed_dim, cfg.text_transformer.encoder.embed_dim, + self.text_ctc_conv_kernel, + stride=self.text_ctc_conv_kernel // 2, + bias=False, + padding=self.text_ctc_conv_kernel // 2, + ) + nn.init.kaiming_normal_(conv.weight) + self.unit_encoder_ctc_head = nn.Sequential( + Rotate3D(), + conv, + nn.Dropout(p=0.1), + nn.Sequential( + Rotate3D(), + Rotate3D(), + LayerNorm(cfg.text_transformer.encoder.embed_dim), + ), + nn.GELU(), + nn.Linear(cfg.text_transformer.encoder.embed_dim, len(text_tgt_dictionary)), + ) + + ### build unit2text decoder, not available for now + self.add_decoder = cfg.add_decoder + + def build_embedding(self, dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + return Embedding(num_embeddings, embed_dim, padding_idx) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: SpeechlmConfig, task: HubertPretrainingTask): + """Build a new model instance.""" + unit_dictionary = getattr(task, "text_src_dictionary", None) + text_tgt_dictionary = getattr(task, "text_dictionary", None) + model = SpeechlmModel(cfg, task.cfg, task.dictionaries, unit_dictionary, text_tgt_dictionary) + return model + + def apply_mask(self, x, padding_mask, target_list): + B, T, C = x.shape + if self.mask_prob > 0: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def forward_features(self, source: torch.Tensor) -> torch.Tensor: + if self.feature_grad_mult > 0: + features = self.feature_extractor(source) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = self.feature_extractor(source) + return features + + def forward_targets( + self, + features: torch.Tensor, + target_list: List[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Trim features to ensure labels exist and then get aligned labels + feat_tsz = features.size(2) + targ_tsz = min([t.size(1) for t in target_list]) + if self.feat2tar_ratio * feat_tsz > targ_tsz: + feat_tsz = int(targ_tsz / self.feat2tar_ratio) + features = features[..., :feat_tsz] + target_inds = torch.arange(feat_tsz).float() * self.feat2tar_ratio + target_inds += np.random.choice(int(self.feat2tar_ratio)) + target_list = [t[:, target_inds.long()] for t in target_list] + return features, target_list + + def forward_padding_mask( + self, + features: torch.Tensor, + padding_mask: torch.Tensor, + ) -> torch.Tensor: + extra = padding_mask.size(1) % features.size(1) + if extra > 0: + padding_mask = padding_mask[:, :-extra] + padding_mask = padding_mask.view(padding_mask.size(0), features.size(1), -1) + padding_mask = padding_mask.all(-1) + return padding_mask + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + def downsample_ctc_padding_mask(self, padding_mask): + """ + padding_mask: (B, T) + """ + stride = self.text_ctc_conv_kernel // 2 + return padding_mask[:, ::stride] + + def compute_pred(self, proj_x, label_embs): + if self.target_glu: + label_embs = self.target_glu(label_embs) + x = F.normalize(proj_x.float(), dim=-1) # (S, D) + label_embs = F.normalize(label_embs.float(), dim=-1) # (C, D) + logits = torch.matmul(x, label_embs.T).type_as(proj_x) # (S, C) + logits /= self.logit_temp + return logits + + def compute_hubert_logits(self, x, target, proj, label_embs, padding_mask, mask_indices): + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + proj_x_m = proj(x[masked_indices]) + logit_m_list = [(self.compute_pred(proj_x_m, label_embs), target[masked_indices])] + else: + logit_m_list = [None] + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + proj_x_u = proj(x[nomask_indices]) + logit_u_list = [(self.compute_pred(proj_x_u, label_embs), target[nomask_indices])] + else: + logit_u_list = [None] + + return logit_m_list, logit_u_list + + def convert_embeddings(self, + x, + padding_mask, + target=None, + mask_indices=None, + mix_with_unit=False, + use_pred_unit=False, + l2_embedding=False, + remask=False + ): + """ + 1. Mix with units if needed (default: True) + 2. Prepare for unit_encoder inputs + Inputs: + x, (B, T, D) + Return: + src_tokens, (B, T) + soft_embeddings, (B, T, D) + l2_loss, a loss + """ + soft_embeddings = self.final_proj_list[0](x) if x.size(-1) == self.final_dim else x + if padding_mask is None: + padding_mask = soft_embeddings.new_zeros(soft_embeddings.size(0), soft_embeddings.size(1), dtype=torch.long) + if use_pred_unit: + src_tokens = self.compute_pred(self.final_proj_list[0](x), self.label_embs_list[0]).argmax(dim=-1) + src_tokens[padding_mask] = self.padding_idx + elif target is not None: + src_tokens = target + else: + src_tokens = padding_mask.long() + + if l2_embedding | mix_with_unit: + unit_embeddings = self.unit_embed_tokens(src_tokens) # (B, T, D) + + l2_loss = 0 + if l2_embedding: + if mask_indices is not None: + l2_loss = (soft_embeddings - unit_embeddings)[mask_indices].float().pow(2).mean(dim=-1) + scale = unit_embeddings[mask_indices].float().pow(2).sum(dim=-1) + else: + l2_loss = (soft_embeddings - unit_embeddings).float().pow(2).mean(dim=-1) + scale = unit_embeddings.float().pow(2).sum(dim=-1) + l2_loss = (l2_loss / scale).mean() + + if mix_with_unit: + B, T, D = x.shape + selected_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob / 2, + self.mask_length // 2, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + selected_indices = torch.from_numpy(selected_indices).to(x.device) + if mask_indices is not None: + if remask: + remask_indices = torch.logical_and(selected_indices, mask_indices) + soft_embeddings[remask_indices] = self.mask_emb + swap_indices = torch.logical_and(selected_indices, ~mask_indices) + else: + swap_indices = selected_indices + soft_embeddings[swap_indices] = unit_embeddings[swap_indices] + + soft_embeddings = soft_embeddings * (1 - padding_mask.unsqueeze(-1).type_as(x)) + return src_tokens, soft_embeddings, l2_loss + + def forward( + self, + source: torch.Tensor = None, + src_tokens: torch.Tensor = None, + src_lengths: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + assert source is not None or src_tokens is not None + if source is not None: + return self.forward_speech( + source=source, + target_list=target_list, + padding_mask=padding_mask, + mask=mask, + features_only=features_only, + output_layer=output_layer, + ) + else: + return self.forward_text( + src_tokens=src_tokens, + src_lengths=src_lengths, + mask=self.mask_u2t, + output_layer=output_layer, + ) + + def forward_speech( + self, + source: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + """output layer is 1-based""" + features = self.forward_features(source) + if target_list is not None: + features, target_list = self.forward_targets(features, target_list) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + + if mask: + x, mask_indices = self.apply_mask(features, padding_mask, target_list) + else: + x = features + mask_indices = None + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, _ = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1, + ) + + if features_only: + return {"x": x, "padding_mask": padding_mask, "features": features} + + logit_m_list, logit_u_list = self.compute_hubert_logits( + x, + target_list[0], + self.final_proj_list[0], + self.label_embs_list[0], + padding_mask, + mask_indices, + ) + + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + + if self.add_unit_encoder: + src_tokens, x_emb, l2_loss = self.convert_embeddings( + x, + padding_mask, target_list[0], + mask_indices=mask_indices, + mix_with_unit=self.mix_with_unit, + use_pred_unit=self.use_pred_unit, + l2_embedding=self.l2_embedding, + ) + encoder_out = self.unit_encoder(src_tokens, token_embeddings=x_emb) + + result['encoder_out'] = encoder_out['encoder_out'] # [(T, B, D)] + result['encoder_padding_mask'] = encoder_out['encoder_padding_mask'] # [(B, T)] + if self.l2_embedding: + result['embedding_l2_loss'] = l2_loss + + code_logit_m_list, code_logit_u_list = self.compute_hubert_logits( + encoder_out['encoder_out'][0].transpose(0, 1), + target_list[-1], + self.final_proj_list[-1], + self.label_embs_list[-1], + padding_mask, + mask_indices, + ) + result['logit_m_list'] += code_logit_m_list + result['logit_u_list'] += code_logit_u_list + return result + + def forward_text( + self, + src_tokens: torch.Tensor = None, + src_lengths: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + mask: bool = True, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + assert self.add_unit_encoder, f"Can not forward unit-text branch without unit_encoder!" + + padding_mask = src_tokens == self.padding_idx + unit_embeddings = self.unit_embed_tokens(src_tokens) + if mask: + unit_embeddings, mask_indices = self.apply_mask(unit_embeddings, padding_mask, [src_tokens]) + else: + ### If already applied mask on src_tokens, then the target_list should contains many padding_idx + mask_indices = target_list[-1] != self.padding_idx + unit_embeddings[mask_indices] = self.mask_emb + + encoder_out = self.unit_encoder( + src_tokens, + token_embeddings=unit_embeddings, + return_all_hiddens=output_layer is not None, + ) + + result = {} + result["encoder_out"] = encoder_out["encoder_out"] + result["encoder_states"] = encoder_out["encoder_states"] + result["padding_mask"] = padding_mask + + if self.compute_mum: + code_logit_m_list, code_logit_u_list = self.compute_hubert_logits( + encoder_out["encoder_out"].transpose(0, 1), + target_list[-1], + self.final_proj_list[-1], + self.label_embs_list[-1], + padding_mask, + mask_indices, + ) + result["logit_m_list"] = code_logit_m_list + result["logit_u_list"] = code_logit_u_list + + if self.add_text_ctc: + result["encoder_out_ctc"] = [self.unit_encoder_ctc_head(x) for x in encoder_out['encoder_out']] + result["encoder_padding_mask"] = [ + self.downsample_ctc_padding_mask(padding_mask) for padding_mask in encoder_out['encoder_padding_mask'] + ] + return result + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + **kwargs, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Extract features for only speech input""" + res = self.forward( + source, + padding_mask=padding_mask, + mask=mask, + features_only=True, + output_layer=output_layer, + ) + x = res["x"] # B x T x D + padding_mask = res["padding_mask"] + + if self.add_unit_encoder: + src_tokens, x, _ = self.convert_embeddings( + x, + padding_mask, + mix_with_unit=False, + use_pred_unit=False, + ) + encoder_out = self.unit_encoder( + src_tokens, + token_embeddings=x, + return_all_hiddens=output_layer is not None + ) + res["x"] = encoder_out['encoder_out'][0].transpose(0, 1) # (B, T, D) + + feature = res["features"] if ret_conv else res["x"] + if output_layer is not None: + feature = encoder_out['encoder_states'] + + return feature, padding_mask + + def get_logits(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + logits_list = [x[0].float() for x in logits_list if x is not None] + return logits_list + + def get_targets(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + targets_list = [x[1].long() for x in logits_list if x is not None] + return targets_list + + def get_extra_losses(self, net_output): + extra_losses = [] + names = [] + + if "features_pen" in net_output: + extra_losses.append(net_output["features_pen"]) + names.append("features_pen") + + if "embedding_l2_loss" in net_output: + extra_losses.append(net_output["embedding_l2_loss"]) + names.append("embedding_l2_loss") + + return extra_losses, names + + def remove_pretraining_modules(self, step2=False): + self.target_glu = None + + def load_checkpoint(self, checkpoint: str): + if not PathManager.exists(checkpoint): + raise IOError("Model file not found: {}".format(checkpoint)) + state = checkpoint_utils.load_checkpoint_to_cpu(checkpoint) + return state + +class Rotate3D(nn.Module): + """ + (T, B, D) --> (B, D, T) --> (D, T, B) --> (T, B, D) + """ + def __init__(self): + super().__init__() + + def forward(self, x): + return x.permute(1, 2, 0) diff --git a/SpeechT5/SpeechLM/speechlm/models/speechlm_ctcasr.py b/SpeechT5/SpeechLM/speechlm/models/speechlm_ctcasr.py new file mode 100644 index 0000000000000000000000000000000000000000..642a51d83de671f067417202a74af29d1466653c --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/models/speechlm_ctcasr.py @@ -0,0 +1,56 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +from dataclasses import dataclass +from fairseq.models import BaseFairseqModel, register_model +from fairseq.tasks import FairseqTask + +from fairseq.models.hubert import HubertAsrConfig, HubertCtc, HubertEncoder + +@dataclass +class SpeechLMCtcConfig(HubertAsrConfig): + pass + + +@register_model("speechlm_ctc", dataclass=SpeechLMCtcConfig) +class SpeechLMCtc(HubertCtc): + def __init__(self, cfg: SpeechLMCtcConfig, w2v_encoder: BaseFairseqModel): + super().__init__(cfg, w2v_encoder) + + @classmethod + def build_model(cls, cfg: SpeechLMCtcConfig, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = SpeechLMEncoder(cfg, task) + return cls(cfg, w2v_encoder) + + +class SpeechLMEncoder(HubertEncoder): + def __init__(self, cfg: HubertAsrConfig, task): + super().__init__(cfg, task) + + if (task.target_dictionary is not None) and ( + hasattr(self.w2v_model, "unit_encoder_ctc_head") + ): + self.proj = self.w2v_model.unit_encoder_ctc_head + self.conv_ctc_proj = True + else: + self.conv_ctc_proj = False + + def forward(self, source, padding_mask, tbc=True, **kwargs): + results = super().forward( + source, + padding_mask, + tbc, + **kwargs, + ) + if self.conv_ctc_proj: + padding_mask = self.w2v_model.downsample_ctc_padding_mask(results["padding_mask"]) + results["encoder_padding_mask"] = padding_mask + results["padding_mask"] = padding_mask + return results diff --git a/SpeechT5/SpeechLM/speechlm/models/speechlm_st.py b/SpeechT5/SpeechLM/speechlm/models/speechlm_st.py new file mode 100644 index 0000000000000000000000000000000000000000..6f70c549f9200c043149acedbb09385c2aaca4d3 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/models/speechlm_st.py @@ -0,0 +1,268 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import contextlib +import torch +import torch.nn as nn +from argparse import Namespace +from dataclasses import dataclass, field +from typing import Any +from fairseq import checkpoint_utils, tasks, utils +from fairseq.models import FairseqEncoderDecoderModel, register_model +from fairseq.models.fairseq_decoder import FairseqDecoder +from fairseq.models.fairseq_encoder import FairseqEncoder +from fairseq.tasks import FairseqTask +from fairseq.dataclass import ChoiceEnum +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.data.data_utils import lengths_to_padding_mask + +from fairseq.models.hubert import HubertAsrConfig +from speechlm.modules.transformer_decoder import TransformerDecoderScriptable + +@dataclass +class SpeechLMS2TConfig(HubertAsrConfig): + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="gelu", metadata={"help": "activation function to use"} + ) + use_rel_pos_enc: bool = field( + default=True, + metadata={"help": "whether to use relative positional encoding for decoder"}, + ) + encoder_embed_dim: int = field( + default=768, metadata={"help": "encoder embedding dimension, used for enc-dec att"} + ) + decoder_embed_dim: int = field( + default=768, metadata={"help": "decoder embedding dimension"} + ) + decoder_output_dim: int = field( + default=768, metadata={"help": "decoder output dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_layers: int = field(default=6, metadata={"help": "num of decoder layers"}) + decoder_layerdrop: float = field( + default=0.0, metadata={"help": "decoder layerdrop chance"} + ) + decoder_attention_heads: int = field( + default=12, metadata={"help": "num decoder attention heads"} + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "use learned positional embeddings in the decoder"}, + ) + decoder_normalize_before: bool = field( + default=False, metadata={"help": "apply layernorm before each decoder block"} + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={ + "help": "if set, disables positional embeddings (outside self attention)" + }, + ) + decoder_dropout: float = field( + default=0.0, metadata={"help": "dropout probability in the decoder"} + ) + decoder_attention_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability for attention weights inside the decoder" + }, + ) + decoder_activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN inside the decoder" + }, + ) + share_decoder_input_output_embed: bool = field( + default=False, metadata={"help": "share decoder input and output embeddings"} + ) + ### the following config is only for the compatibility to fairseq speech_to_text task + input_feat_per_channel: Any = None + input_channels: Any = None + speaker_to_id: Any = None + +@register_model("speechlm_st_legacy", dataclass=SpeechLMS2TConfig) +class SpeechLMS2T(FairseqEncoderDecoderModel): + def __init__(self, cfg: SpeechLMS2TConfig, encoder: FairseqEncoder, decoder: FairseqDecoder): + super().__init__(encoder, decoder) + self.cfg = cfg + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: SpeechLMS2TConfig, task: FairseqTask): + """Build a new model instance.""" + def build_embedding(dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + return Embedding(num_embeddings, embed_dim, padding_idx) + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + encoder = SpeechLMEncoder(cfg, task) + assert cfg.encoder_embed_dim == encoder.w2v_model.encoder.embedding_dim + decoder_embed_tokens = build_embedding(tgt_dict, cfg.decoder_embed_dim) + decoder = TransformerDecoderScriptable(cfg, tgt_dict, decoder_embed_tokens) + return cls(cfg, encoder, decoder) + + +class SpeechLMEncoder(FairseqEncoder): + """ + Modified from fairseq.models.hubert.hubert_asr.HubertEncoder + 1. make it compatible with fairseq speech_to_text task + 2. make it compatible with encoder-decoder model + """ + def __init__(self, cfg: HubertAsrConfig, task): + self.apply_mask = cfg.apply_mask + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + } + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + assert task.data_cfg.standardize_audio() == w2v_args.task.normalize, ( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + w2v_args.task.data = cfg.data + pretrain_task = tasks.setup_task(w2v_args.task) + if state is not None and "task_state" in state: + # This will load the stored "dictionaries" object + pretrain_task.load_state_dict(state["task_state"]) + else: + pretrain_task.load_state_dict(task.state_dict()) + + model = pretrain_task.build_model(w2v_args.model, from_checkpoint=True) + if state is not None and not cfg.no_pretrained_weights: + # set strict=False because we omit some modules + model.load_state_dict(state["model"], strict=False) + + model.remove_pretraining_modules() + + super().__init__(pretrain_task.source_dictionary) + + d = w2v_args.model.encoder_embed_dim + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.num_updates = 0 + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, src_tokens=None, src_lengths=None, **kwargs): + + w2v_args = { + "source": src_tokens, + "padding_mask": lengths_to_padding_mask(src_lengths), + "mask": self.apply_mask and self.training, + } + + ft = self.freeze_finetune_updates <= self.num_updates + + with torch.no_grad() if not ft else contextlib.ExitStack(): + x, padding_mask = self.w2v_model.extract_features(**w2v_args) + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + x = self.final_dropout(x) + + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [padding_mask], # B x T + "padding_mask": [padding_mask], + } + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + _net_input = { + "source": net_input["src_tokens"], + "padding_mask": lengths_to_padding_mask(net_input["src_lengths"]), + "mask": False, + } + + x, padding_mask = self.w2v_model.extract_features(**_net_input) + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + encoder_out = { + "encoder_out" : [x], + "encoder_padding_mask" : [padding_mask], + } + return encoder_out + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = [ + x.index_select(1, new_order) for x in encoder_out["encoder_out"] + ] + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = [ + x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"] + ] + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/SpeechT5/SpeechLM/speechlm/modules/__init__.py b/SpeechT5/SpeechLM/speechlm/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc082498ee85c984cbbf12776ced6f16e6a0bbf --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/modules/__init__.py @@ -0,0 +1,23 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +from .multihead_attention import MultiheadAttention +from .relative_pos_enc import RelativePositionalEncoding +from .transformer_layer import TransformerEncoderLayerBase, TransformerDecoderLayerBase +from .w2v_encoder import TransformerEncoder, TransformerSentenceEncoderLayer +from .learned_positional_embedding import LearnedPositionalEmbedding + +__all__ = [ + "MultiheadAttention", + "RelativePositionalEncoding", + "TransformerEncoderLayerBase", + "TransformerDecoderLayerBase", + "TransformerEncoder", + "TransformerSentenceEncoderLayer" +] diff --git a/SpeechT5/SpeechLM/speechlm/modules/learned_positional_embedding.py b/SpeechT5/SpeechLM/speechlm/modules/learned_positional_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..9a6d55a37d456715e50da0d23b48005af1aec248 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/modules/learned_positional_embedding.py @@ -0,0 +1,68 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/modules/learned_positional_embedding.py + 1. Add clamping if the input length exceeds the max-source-tokens +""" + +from typing import Dict, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from torch import Tensor + + +class LearnedPositionalEmbedding(nn.Embedding): + """ + This module learns positional embeddings up to a fixed maximum size. + Padding ids are ignored by either offsetting based on padding_idx + or by setting padding_idx to None and ensuring that the appropriate + position ids are passed to the forward function. + """ + + def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: int): + super().__init__(num_embeddings, embedding_dim, padding_idx) + self.onnx_trace = False + if self.padding_idx is not None: + self.max_positions = self.num_embeddings - self.padding_idx - 1 + else: + self.max_positions = self.num_embeddings + + def forward( + self, + input: Tensor, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + positions: Optional[Tensor] = None, + ): + """Input is expected to be of size [bsz x seqlen].""" + assert (positions is None) or ( + self.padding_idx is None + ), "If positions is pre-computed then padding_idx should not be set." + + if positions is None: + if incremental_state is not None: + # positions is the same for every token when decoding a single step + # Without the int() cast, it doesn't work in some cases when exporting to ONNX + positions = torch.zeros( + (1, 1), device=input.device, dtype=input.dtype + ).fill_(int(self.padding_idx + input.size(1))) + else: + positions = utils.make_positions( + input, self.padding_idx, onnx_trace=self.onnx_trace + ) + positions = torch.clamp(positions, max=self.padding_idx + self.max_positions) + return F.embedding( + positions, + self.weight, + self.padding_idx, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.sparse, + ) diff --git a/SpeechT5/SpeechLM/speechlm/modules/multihead_attention.py b/SpeechT5/SpeechLM/speechlm/modules/multihead_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..a6ac408c623bea27aef3e77db30d91ad6fb904bc --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/modules/multihead_attention.py @@ -0,0 +1,348 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +from typing import Dict, Optional, Tuple + +import torch +import torch.nn.functional as F +from fairseq import utils +from torch import Tensor + +from fairseq.modules import MultiheadAttention as FairseqMultiheadAttention + + +class MultiheadAttention(FairseqMultiheadAttention): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + scaling_for_att=1.0 + ): + super().__init__( + embed_dim, + num_heads, + kdim, + vdim, + dropout, + bias, + add_bias_kv, + add_zero_attn, + self_attention, + encoder_decoder_attention, + q_noise, + qn_block_size, + ) + self.scaling_for_att = scaling_for_att + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + position_bias: Optional[Tensor] = None, + ) -> Tuple[Tensor, Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + is_tpu = query.device.type == "xla" + + tgt_len, bsz, embed_dim = query.size() + src_len = tgt_len + assert embed_dim == self.embed_dim, f"query dim {embed_dim} != {self.embed_dim}" + assert list(query.size()) == [tgt_len, bsz, embed_dim] + if key is not None: + src_len, key_bsz, _ = key.size() + if not torch.jit.is_scripting(): + assert key_bsz == bsz + assert value is not None + assert src_len, bsz == value.shape[:2] + + if ( + not self.onnx_trace + and not is_tpu # don't use PyTorch version on TPUs + and incremental_state is None + and not static_kv + # A workaround for quantization to work. Otherwise JIT compilation + # treats bias in linear module as method. + and not torch.jit.is_scripting() + and position_bias is None + ): + assert key is not None and value is not None + return F.multi_head_attention_forward( + query, + key, + value, + self.embed_dim, + self.num_heads, + torch.empty([0]), + torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), + self.bias_k, + self.bias_v, + self.add_zero_attn, + self.dropout_module.p, + self.out_proj.weight, + self.out_proj.bias, + self.training or self.dropout_module.apply_during_inference, + key_padding_mask, + need_weights, + attn_mask, + use_separate_proj_weight=True, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + ) + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + q *= (1 / self.scaling_for_att) + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if k is not None: + k = ( + k.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + src_len = k.size(1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + assert k.size(1) == src_len + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + torch.zeros(key_padding_mask.size(0), 1).type_as( + key_padding_mask + ), + ], + dim=1, + ) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + if position_bias is not None: ## first order + ## position_bias: [241, 241, 64] + #print ("attn_weights: ", attn_weights.size()) # [492, 241, 241] + reshape_q = q.contiguous().view(bsz * self.num_heads, -1, self.head_dim).transpose(0,1) #[241, 492, 64] + #print ("reshape_q: ", reshape_q.size()) + B = torch.matmul(reshape_q, position_bias.transpose(-2, -1)) + #print ("B: ", B.size()) ## [241, 492, 241] + #B = B.transpose(0, 1).view(bsz, self.num_heads, position_bias.size(0), position_bias.size(1)) + B = B.transpose(0, 1).view(bsz*self.num_heads, position_bias.size(0), position_bias.size(1)) + #print ("B 2: ", B.size()) + attn_weights += B + + attn_weights *= self.scaling_for_att + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + if not is_tpu: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if self.scaling_for_att > 1.0: + attn_weights = attn_weights - attn_weights.detach().max(dim=-1, keepdim=True)[0] + + if before_softmax: + return attn_weights, v + + attn_weights_float = utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace + ) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + if self.onnx_trace and attn.size(1) == 1: + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + attn_weights = attn_weights_float.view( + bsz, self.num_heads, tgt_len, src_len + ).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights diff --git a/SpeechT5/SpeechLM/speechlm/modules/relative_pos_enc.py b/SpeechT5/SpeechLM/speechlm/modules/relative_pos_enc.py new file mode 100644 index 0000000000000000000000000000000000000000..2a073ebf2893e9e9b092aa520bdaf927e9388c2b --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/modules/relative_pos_enc.py @@ -0,0 +1,35 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import torch + +class RelativePositionalEncoding(torch.nn.Module): + def __init__(self, d_model, maxlen=1000, embed_v=False): + super(RelativePositionalEncoding, self).__init__() + + self.d_model = d_model + self.maxlen = maxlen + self.pe_k = torch.nn.Embedding(2*maxlen, d_model) + if embed_v: + self.pe_v = torch.nn.Embedding(2*maxlen, d_model) + self.embed_v = embed_v + + + def forward(self, pos_seq, incremental_state=None): + pos_seq[pos_seq < -self.maxlen] = -self.maxlen + pos_seq[pos_seq >= self.maxlen] = self.maxlen - 1 + pos_seq = pos_seq + self.maxlen + + if incremental_state is not None: + pos_seq = pos_seq[-1:] + + if self.embed_v: + return self.pe_k(pos_seq), self.pe_v(pos_seq) + else: + return self.pe_k(pos_seq), None diff --git a/SpeechT5/SpeechLM/speechlm/modules/transformer_decoder.py b/SpeechT5/SpeechLM/speechlm/modules/transformer_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..83e91fab234aaa9dfa00ed9f3fab63b98f97375a --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/modules/transformer_decoder.py @@ -0,0 +1,544 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/transformer/transformer_decoder.py +""" + +import math +from typing import Any, Dict, List, Optional + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqIncrementalDecoder +from fairseq.models.transformer import TransformerConfig +from fairseq.modules import ( + AdaptiveSoftmax, + BaseLayer, + FairseqDropout, + LayerDropModuleList, + LayerNorm, + PositionalEmbedding, + SinusoidalPositionalEmbedding, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from torch import Tensor + +from speechlm.modules import transformer_layer +from speechlm.modules.relative_pos_enc import RelativePositionalEncoding + +# rewrite name for backward compatibility in `make_generation_fast_` +def module_name_fordropout(module_name: str) -> str: + if module_name == "TransformerDecoderBase": + return "TransformerDecoder" + else: + return module_name + + +class TransformerDecoderBase(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *cfg.decoder.layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + cfg, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + use_rel_pos_enc=False, + ): + self.cfg = cfg + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + self._future_mask = torch.empty(0) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.decoder_layerdrop = cfg.decoder.layerdrop + self.share_input_output_embed = cfg.share_decoder_input_output_embed + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = cfg.decoder.embed_dim + self.embed_dim = embed_dim + self.output_embed_dim = cfg.decoder.output_dim + + self.padding_idx = embed_tokens.padding_idx + self.max_target_positions = cfg.max_target_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + self.embed_positions = ( + PositionalEmbedding( + self.max_target_positions, + embed_dim, + self.padding_idx, + learned=cfg.decoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + self.cross_self_attention = cfg.cross_self_attention + + if self.decoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.decoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.use_rel_pos_enc = use_rel_pos_enc + self.layers.extend( + [ + self.build_decoder_layer(cfg, no_encoder_attn) + for _ in range(cfg.decoder.layers) + ] + ) + self.num_layers = len(self.layers) + + if cfg.decoder.normalize_before and not cfg.no_decoder_final_norm: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + + self.project_out_dim = ( + Linear(embed_dim, self.output_embed_dim, bias=False) + if embed_dim != self.output_embed_dim and not cfg.tie_adaptive_weights + else None + ) + + self.adaptive_softmax = None + self.output_projection = output_projection + if self.output_projection is None: + self.build_output_projection(cfg, dictionary, embed_tokens) + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(embed_dim // cfg.decoder.attention_heads, 24) + + def build_output_projection(self, cfg, dictionary, embed_tokens): + if cfg.adaptive_softmax_cutoff is not None: + self.adaptive_softmax = AdaptiveSoftmax( + len(dictionary), + self.output_embed_dim, + utils.eval_str_list(cfg.adaptive_softmax_cutoff, type=int), + dropout=cfg.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if cfg.tie_adaptive_weights else None, + factor=cfg.adaptive_softmax_factor, + tie_proj=cfg.tie_adaptive_proj, + ) + elif self.share_input_output_embed: + self.output_projection = nn.Linear( + self.embed_tokens.weight.shape[1], + self.embed_tokens.weight.shape[0], + bias=False, + ) + self.output_projection.weight = self.embed_tokens.weight + else: + self.output_projection = nn.Linear( + self.output_embed_dim, len(dictionary), bias=False + ) + nn.init.normal_( + self.output_projection.weight, mean=0, std=self.output_embed_dim ** -0.5 + ) + num_base_layers = cfg.base_layers + for i in range(num_base_layers): + self.layers.insert( + ((i + 1) * cfg.decoder.layers) // (num_base_layers + 1), + BaseLayer(cfg), + ) + + def build_decoder_layer(self, cfg, no_encoder_attn=False): + layer = transformer_layer.TransformerDecoderLayerBase(cfg, no_encoder_attn, has_relative_attention_bias=self.use_rel_pos_enc) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + + if not features_only: + x = self.output_layer(x) + return x, extra + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + return self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + + """ + A scriptable subclass of this class has an extract_features method and calls + super().extract_features, but super() is not supported in torchscript. A copy of + this function is made to be used in the subclass instead. + """ + + def extract_features_scriptable( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + """ + Similar to *forward* but only return features. + + Includes several features from "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + bs, slen = prev_output_tokens.size() + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + enc: Optional[Tensor] = None + padding_mask: Optional[Tensor] = None + if encoder_out is not None and len(encoder_out["encoder_out"]) > 0: + enc = encoder_out["encoder_out"][0] + assert ( + enc.size()[1] == bs + ), f"Expected enc.shape == (t, {bs}, c) got {enc.shape}" + if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0: + padding_mask = encoder_out["encoder_padding_mask"][0] + + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if self.use_rel_pos_enc: + pos_seq = torch.arange(0, slen).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, _ = self.pos_emb(pos_seq, incremental_state) + else: + pos_k = None + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + + # decoder layers + attn: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = layer( + x, + enc, + padding_mask, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + pos_bias=pos_k, + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + + if attn is not None: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": [attn], "inner_states": inner_states} + + def output_layer(self, features): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + return self.output_projection(features) + else: + return features + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround. + if ( + self._future_mask.size(0) == 0 + or (not self._future_mask.device == tensor.device) + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1 + ) + self._future_mask = self._future_mask.to(tensor) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = "{}.embed_positions.weights".format(name) + if weights_key in state_dict: + del state_dict[weights_key] + state_dict[ + "{}.embed_positions._float_tensor".format(name) + ] = torch.FloatTensor(1) + + if f"{name}.output_projection.weight" not in state_dict: + if self.share_input_output_embed: + embed_out_key = f"{name}.embed_tokens.weight" + else: + embed_out_key = f"{name}.embed_out" + if embed_out_key in state_dict: + state_dict[f"{name}.output_projection.weight"] = state_dict[ + embed_out_key + ] + if not self.share_input_output_embed: + del state_dict[embed_out_key] + + for i in range(self.num_layers): + # update layer norms + layer_norm_map = { + "0": "self_attn_layer_norm", + "1": "encoder_attn_layer_norm", + "2": "final_layer_norm", + } + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m) + if k in state_dict: + state_dict[ + "{}.layers.{}.{}.{}".format(name, i, new, m) + ] = state_dict[k] + del state_dict[k] + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m + +class TransformerDecoderBaseScriptable(TransformerDecoderBase): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + return x, None + + +class TransformerDecoder(TransformerDecoderBase): + def __init__( + self, + args, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + ): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + no_encoder_attn=no_encoder_attn, + output_projection=output_projection, + use_rel_pos_enc=getattr(args, "use_rel_pos_enc", False), + ) + + def build_output_projection(self, args, dictionary, embed_tokens): + super().build_output_projection( + TransformerConfig.from_namespace(args), dictionary, embed_tokens + ) + + def build_decoder_layer(self, args, no_encoder_attn=False): + return super().build_decoder_layer( + TransformerConfig.from_namespace(args), no_encoder_attn=no_encoder_attn + ) + +class TransformerDecoderScriptable(TransformerDecoder): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + return x, None diff --git a/SpeechT5/SpeechLM/speechlm/modules/transformer_encoder.py b/SpeechT5/SpeechLM/speechlm/modules/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..43dc7f82708de434d383b751084e04e2f89d0bd9 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/modules/transformer_encoder.py @@ -0,0 +1,403 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import math +from typing import Dict, List, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqEncoder +from fairseq.modules import ( + FairseqDropout, + LayerDropModuleList, + LayerNorm, + SinusoidalPositionalEmbedding, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from torch import Tensor +from fairseq.models.transformer import ( + TransformerConfig, +) + + +from speechlm.modules import transformer_layer, LearnedPositionalEmbedding +from speechlm.modules.relative_pos_enc import RelativePositionalEncoding + +# rewrite name for backward compatibility in `make_generation_fast_` +def module_name_fordropout(module_name: str) -> str: + if module_name == "TransformerEncoderBase": + return "TransformerEncoder" + else: + return module_name + + +class TransformerEncoderBase(FairseqEncoder): + """ + Transformer encoder consisting of *cfg.encoder.layers* layers. Each layer + is a :class:`TransformerEncoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): encoding dictionary + embed_tokens (torch.nn.Embedding): input embedding + """ + + def __init__(self, cfg, dictionary, embed_tokens, use_rel_pos_enc=False, scaling_for_att=1.0): + self.cfg = cfg + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.encoder_layerdrop = cfg.encoder.layerdrop + + embed_dim = embed_tokens.embedding_dim + self.padding_idx = embed_tokens.padding_idx + self.max_source_positions = cfg.max_source_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + self.embed_positions = ( + PositionalEmbedding( + cfg.max_source_positions, + embed_dim, + self.padding_idx, + learned=cfg.encoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + if self.encoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.encoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.use_rel_pos_enc = use_rel_pos_enc + self.scaling_for_att = scaling_for_att + self.layers.extend( + [self.build_encoder_layer(cfg) for i in range(cfg.encoder.layers)] + ) + self.num_layers = len(self.layers) + + if cfg.encoder.normalize_before: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(embed_dim // cfg.encoder.attention_heads, 160) + + def build_encoder_layer(self, cfg): + layer = transformer_layer.TransformerEncoderLayerBase(cfg, has_relative_attention_bias=self.use_rel_pos_enc, scaling_for_att=self.scaling_for_att) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward_embedding( + self, src_tokens, token_embedding: Optional[torch.Tensor] = None + ): + # embed tokens and positions + if token_embedding is None: + token_embedding = self.embed_tokens(src_tokens) + x = embed = self.embed_scale * token_embedding + if self.embed_positions is not None: + x = embed + self.embed_positions(src_tokens) + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + x = self.dropout_module(x) + if self.quant_noise is not None: + x = self.quant_noise(x) + return x, embed + + def forward( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + uniformity_layers: Optional[List[int]] = None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + return self.forward_scriptable( + src_tokens, src_lengths, return_all_hiddens, token_embeddings, uniformity_layers + ) + + # TorchScript doesn't support super() method so that the scriptable Subclass + # can't access the base class model in Torchscript. + # Current workaround is to add a helper function with different name and + # call the helper function from scriptable Subclass. + def forward_scriptable( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + uniformity_layers: Optional[List[int]] = None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + # compute padding mask + encoder_padding_mask = src_tokens.eq(self.padding_idx) + has_pads = src_tokens.device.type == "xla" or encoder_padding_mask.any() + + x, encoder_embedding = self.forward_embedding(src_tokens, token_embeddings) + + # account for padding while computing the representation + if has_pads: + x = x * (1 - encoder_padding_mask.unsqueeze(-1).type_as(x)) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if self.use_rel_pos_enc: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + encoder_states = [] + uniformity_hiddens = [] + + if return_all_hiddens: + encoder_states.append(x) + + if uniformity_layers is not None and 0 in uniformity_layers: + x = F.normalize(x.float(), dim=-1).type_as(x) + uniformity_hiddens.append(x) + + # encoder layers + for i, layer in enumerate(self.layers): + x = layer( + x, encoder_padding_mask=encoder_padding_mask if has_pads else None, + pos_bias=pos_k, + ) + if uniformity_layers is not None and i+1 in uniformity_layers: + x = F.normalize(x.float(), dim=-1).type_as(x) + uniformity_hiddens.append(x) + if return_all_hiddens: + assert encoder_states is not None + encoder_states.append(x) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in + # `forward` so we use a dictionary instead. + # TorchScript does not support mixed values so the values are all lists. + # The empty list is equivalent to None. + src_lengths = ( + src_tokens.ne(self.padding_idx) + .sum(dim=1, dtype=torch.int32) + .reshape(-1, 1) + .contiguous() + ) + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask], # B x T + "encoder_embedding": [encoder_embedding], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "uniformity_hiddens": uniformity_hiddens, # List[T x B x C] + "src_tokens": [], + "src_lengths": [src_lengths], + } + + @torch.jit.export + def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + if len(encoder_out["encoder_out"]) == 0: + new_encoder_out = [] + else: + new_encoder_out = [encoder_out["encoder_out"][0].index_select(1, new_order)] + if len(encoder_out["encoder_padding_mask"]) == 0: + new_encoder_padding_mask = [] + else: + new_encoder_padding_mask = [ + encoder_out["encoder_padding_mask"][0].index_select(0, new_order) + ] + if len(encoder_out["encoder_embedding"]) == 0: + new_encoder_embedding = [] + else: + new_encoder_embedding = [ + encoder_out["encoder_embedding"][0].index_select(0, new_order) + ] + + if len(encoder_out["src_tokens"]) == 0: + src_tokens = [] + else: + src_tokens = [(encoder_out["src_tokens"][0]).index_select(0, new_order)] + + if len(encoder_out["src_lengths"]) == 0: + src_lengths = [] + else: + src_lengths = [(encoder_out["src_lengths"][0]).index_select(0, new_order)] + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_embedding": new_encoder_embedding, # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": src_tokens, # B x T + "src_lengths": src_lengths, # B x 1 + } + + def max_positions(self): + """Maximum input length supported by the encoder.""" + if self.embed_positions is None: + return self.max_source_positions + return min(self.max_source_positions, self.embed_positions.max_positions) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = "{}.embed_positions.weights".format(name) + if weights_key in state_dict: + print("deleting {0}".format(weights_key)) + del state_dict[weights_key] + state_dict[ + "{}.embed_positions._float_tensor".format(name) + ] = torch.FloatTensor(1) + for i in range(self.num_layers): + # update layer norms + self.layers[i].upgrade_state_dict_named( + state_dict, "{}.layers.{}".format(name, i) + ) + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + return state_dict + + +class TransformerEncoder(TransformerEncoderBase): + def __init__(self, args, dictionary, embed_tokens): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + use_rel_pos_enc=getattr(args, "use_rel_pos_enc", False), + scaling_for_att=getattr(args, "scaling_for_att", 1.0), + ) + + def build_encoder_layer(self, args): + return super().build_encoder_layer( + TransformerConfig.from_namespace(args), + ) + + +def PositionalEmbedding( + num_embeddings: int, + embedding_dim: int, + padding_idx: int, + learned: bool = False, +): + if learned: + # if padding_idx is specified then offset the embedding ids by + # this index and adjust num_embeddings appropriately + # TODO: The right place for this offset would be inside + # LearnedPositionalEmbedding. Move this there for a cleaner implementation. + if padding_idx is not None: + num_embeddings = num_embeddings + padding_idx + 1 + m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + if padding_idx is not None: + nn.init.constant_(m.weight[padding_idx], 0) + else: + m = SinusoidalPositionalEmbedding( + embedding_dim, + padding_idx, + init_size=num_embeddings + padding_idx + 1, + ) + return m diff --git a/SpeechT5/SpeechLM/speechlm/modules/transformer_layer.py b/SpeechT5/SpeechLM/speechlm/modules/transformer_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..1e3fa96e71426ab77e52bcee6ce052673b0875db --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/modules/transformer_layer.py @@ -0,0 +1,329 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/modules/transformer_layer.py + https://github.com/microsoft/SpeechT5/blob/main/Speech2C/speech2c/models/modules/transformer_decoder_layer.py +""" + +from typing import Dict, List, Optional + +import torch +from torch import Tensor +from fairseq.modules import LayerNorm +from speechlm.modules.multihead_attention import MultiheadAttention +from fairseq.modules.transformer_layer import TransformerEncoderLayerBase as FairseqTransformerEncoderLayerBase +from fairseq.modules.transformer_layer import TransformerDecoderLayerBase as FairseqTransformerDecoderLayerBase + + +class TransformerEncoderLayerBase(FairseqTransformerEncoderLayerBase): + """Encoder layer block. + + In the original paper each operation (multi-head attention or FFN) is + postprocessed with: `dropout -> add residual -> layernorm`. In the + tensor2tensor code they suggest that learning is more robust when + preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.encoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + + def __init__(self, cfg, has_relative_attention_bias=False, scaling_for_att=1.0): + self.scaling_for_att = scaling_for_att + super().__init__(cfg) + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embed_dim // cfg.encoder.attention_heads) + + def build_self_attention(self, embed_dim, cfg, scaling_for_att=1.0): + return MultiheadAttention( + embed_dim, + cfg.encoder.attention_heads, + dropout=cfg.attention_dropout, + self_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + scaling_for_att=self.scaling_for_att, + ) + + def forward( + self, + x, + encoder_padding_mask: Optional[Tensor], + attn_mask: Optional[Tensor] = None, + pos_bias=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, seq_len)` where padding elements are indicated by ``1``. + attn_mask (ByteTensor): binary tensor of shape `(tgt_len, src_len)`, + where `tgt_len` is the length of output and `src_len` is the + length of input, though here both are equal to `seq_len`. + `attn_mask[tgt_i, src_j] = 1` means that when calculating the + embedding for `tgt_i`, we exclude (mask out) `src_j`. This is + useful for strided self-attention. + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + # anything in original attn_mask = 1, becomes -1e8 + # anything in original attn_mask = 0, becomes 0 + # Note that we cannot use -inf here, because at some edge cases, + # the attention weight (before softmax) for some padded element in query + # will become -inf, which results in NaN in model parameters + if attn_mask is not None: + attn_mask = attn_mask.masked_fill( + attn_mask.to(torch.bool), -1e8 if x.dtype == torch.float32 else -1e4 + ) + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, _ = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=encoder_padding_mask, + need_weights=False, + attn_mask=attn_mask, + position_bias=pos_bias, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + return x + + + +class TransformerDecoderLayerBase(FairseqTransformerDecoderLayerBase): + """Decoder layer block. + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.decoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, cfg, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False, has_relative_attention_bias=False, scaling_for_att=1.0, + ): + self.scaling_for_att = scaling_for_att + super().__init__(cfg, + no_encoder_attn, + add_bias_kv, + add_zero_attn, + ) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embed_dim // cfg.decoder.attention_heads) + + def build_self_attention( + self, embed_dim, cfg, add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttention( + embed_dim, + cfg.decoder.attention_heads, + dropout=cfg.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=not cfg.cross_self_attention, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + scaling_for_att=self.scaling_for_att, + ) + + def build_encoder_attention(self, embed_dim, cfg): + return MultiheadAttention( + embed_dim, + cfg.decoder.attention_heads, + kdim=cfg.encoder.embed_dim, + vdim=cfg.encoder.embed_dim, + dropout=cfg.attention_dropout, + encoder_decoder_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + scaling_for_att=self.scaling_for_att, + ) + + def forward( + self, + x, + encoder_out: Optional[torch.Tensor] = None, + encoder_padding_mask: Optional[torch.Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + prev_self_attn_state: Optional[List[torch.Tensor]] = None, + prev_attn_state: Optional[List[torch.Tensor]] = None, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + pos_bias=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if need_head_weights: + need_attn = True + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state, saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1 + ) + if self_attn_padding_mask is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + if self.c_attn is not None: + tgt_len, bsz = x.size(0), x.size(1) + x = x.view(tgt_len, bsz, self.nh, self.head_dim) + x = torch.einsum("tbhd,h->tbhd", x, self.c_attn) + x = x.reshape(tgt_len, bsz, self.embed_dim) + if self.attn_ln is not None: + x = self.attn_ln(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + if self.encoder_attn is not None and encoder_out is not None: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + if self.ffn_layernorm is not None: + x = self.ffn_layernorm(x) + x = self.fc2(x) + x = self.dropout_module(x) + if self.w_resid is not None: + residual = torch.mul(self.w_resid, residual) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]] + return x, attn, self_attn_state + return x, attn, None + + def make_generation_fast_(self, need_attn: bool = False, **kwargs): + self.need_attn = need_attn diff --git a/SpeechT5/SpeechLM/speechlm/modules/w2v_encoder.py b/SpeechT5/SpeechLM/speechlm/modules/w2v_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..9b8c15f1289d49581dbaae321fe569daeffb5242 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/modules/w2v_encoder.py @@ -0,0 +1,283 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + wav2vec encoder adding relitive position bias, modified from + https://github.com/microsoft/SpeechT5/blob/main/Speech2C/speech2c/models/modules/transformer_encoder.py + https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/wav2vec/wav2vec2.py +""" + +import math +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.dataclass import ChoiceEnum +from fairseq.modules import ( + LayerNorm, + SamePad, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from fairseq.utils import index_put +from fairseq.distributed import fsdp_wrap +from fairseq.models.wav2vec.utils import pad_to_multiple + +## reload multi-head attition with rel-pos-bias +from fairseq.models.wav2vec.wav2vec2 import TransformerEncoder as W2vTransformerEncoder +from speechlm.modules.relative_pos_enc import RelativePositionalEncoding +from speechlm.modules.multihead_attention import MultiheadAttention + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) + + +class TransformerEncoder(W2vTransformerEncoder): + def __init__(self, args): + super().__init__(args) + + self.dropout = args.dropout + self.embedding_dim = args.encoder_embed_dim + self.required_seq_len_multiple = args.required_seq_len_multiple + self.use_rel_pos_enc = getattr(args, "use_rel_pos_enc", False) + + self.pos_conv = nn.Conv1d( + self.embedding_dim, + self.embedding_dim, + kernel_size=args.conv_pos, + padding=args.conv_pos // 2, + groups=args.conv_pos_groups, + ) + dropout = 0 + std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim)) + nn.init.normal_(self.pos_conv.weight, mean=0, std=std) + nn.init.constant_(self.pos_conv.bias, 0) + + self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2) + self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU()) + + layers = [] + for _ in range(args.encoder_layers): + layer = TransformerSentenceEncoderLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=self.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + has_relative_attention_bias=self.use_rel_pos_enc, + ) + if args.checkpoint_activations: + layer = fsdp_wrap(layer) + layer = checkpoint_wrapper(layer) + layers.append(layer) + self.layers = nn.ModuleList(layers) + + self.layer_norm_first = args.layer_norm_first + self.layer_norm = LayerNorm(self.embedding_dim) + self.layerdrop = args.encoder_layerdrop + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(args.encoder_embed_dim // args.encoder_attention_heads, 160) + + + self.apply(init_bert_params) + + def forward(self, x, padding_mask=None, layer=None): + x, layer_results = self.extract_features(x, padding_mask, layer) + + if self.layer_norm_first and layer is None: + x = self.layer_norm(x) + + return x, layer_results + + def extract_features(self, x, padding_mask=None, tgt_layer=None): + + if padding_mask is not None: + x = index_put(x, padding_mask, 0) + + x_conv = self.pos_conv(x.transpose(1, 2)) + x_conv = x_conv.transpose(1, 2) + x = x + x_conv + + if not self.layer_norm_first: + x = self.layer_norm(x) + + # pad to the sequence length dimension + x, pad_length = pad_to_multiple( + x, self.required_seq_len_multiple, dim=-2, value=0 + ) + if pad_length > 0 and padding_mask is None: + padding_mask = x.new_zeros((x.size(0), x.size(1)), dtype=torch.bool) + padding_mask[:, -pad_length:] = True + else: + padding_mask, _ = pad_to_multiple( + padding_mask, self.required_seq_len_multiple, dim=-1, value=True + ) + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + if self.use_rel_pos_enc: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + layer_results = [] + r = None + for i, layer in enumerate(self.layers): + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, z = layer(x, self_attn_padding_mask=padding_mask, need_weights=False, pos_bias=pos_k) + if tgt_layer is not None: + # unpad if needed + if pad_length > 0: + layer_results.append( + ( + x[:-pad_length], + z[:, :-pad_length, :-pad_length] + if z is not None + else z, + ) + ) + else: + layer_results.append((x, z)) + if i == tgt_layer: + r = x + break + + if r is not None: + x = r + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + # undo paddding + if pad_length > 0: + x = x[:, :-pad_length] + + return x, layer_results + + +class TransformerSentenceEncoderLayer(nn.Module): + """ + Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained + models. + """ + + def __init__( + self, + embedding_dim: float = 768, + ffn_embedding_dim: float = 3072, + num_attention_heads: float = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + layer_norm_first: bool = False, + has_relative_attention_bias: bool = False, + ) -> None: + + super().__init__() + # Initialize parameters + self.embedding_dim = embedding_dim + self.dropout = dropout + self.activation_dropout = activation_dropout + + # Initialize blocks + self.activation_fn = utils.get_activation_fn(activation_fn) + self.self_attn = MultiheadAttention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + self_attention=True, + ) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(self.activation_dropout) + self.dropout3 = nn.Dropout(dropout) + + self.layer_norm_first = layer_norm_first + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = LayerNorm(self.embedding_dim) + self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim) + self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim) + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = LayerNorm(self.embedding_dim) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embedding_dim//num_attention_heads) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + att_args=None, + pos_bias=None, + ): + """ + LayerNorm is applied either before or after the self-attention/ffn + modules similar to the original Transformer imlementation. + """ + residual = x + + if self.layer_norm_first: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + x = self.dropout1(x) + x = residual + x + + residual = x + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + else: + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + position_bias=pos_bias, + ) + + x = self.dropout1(x) + x = residual + x + + x = self.self_attn_layer_norm(x) + + residual = x + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + x = self.final_layer_norm(x) + + return x, attn diff --git a/SpeechT5/SpeechLM/speechlm/scripts/pretrain_speechlm/base_speechlmh.sh b/SpeechT5/SpeechLM/speechlm/scripts/pretrain_speechlm/base_speechlmh.sh new file mode 100644 index 0000000000000000000000000000000000000000..650f7dc43175f33c518d3fa294e32683aa77518c --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/pretrain_speechlm/base_speechlmh.sh @@ -0,0 +1,43 @@ +# #################################### +# SpeechLM-H Base model # +# #################################### +[ $# -lt 2 ] && echo "Usage: $0 [mount=${PWD}] [world_size=32] [update_freq=1]" && exit 1 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +mount=$3 +world_size=$4 +update_freq=$5 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=1 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/base_speechlmh_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechlm/config/pretrain \ + --config-name speechlm_base_librispeech \ + common.user_dir=$CODE_ROOT/speechlm \ + \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + dataset.train_subset=\"train_960+train_text.km-ltr\" \ + dataset.valid_subset=\"dev_clean+dev_clean.km-ltr\" \ + dataset.num_workers=0 \ + dataset.max_tokens=1400000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=pretrain + +# data_dir="/mnt/default/v-ziqzhang/data/stbert/data/librispeech/hubert_release_iter2_layer9_kmeans/local" +# text_data_dir="/mnt/default/v-ziqzhang/dataset/LibriLM/from_fastT2U/bin-idx" diff --git a/SpeechT5/SpeechLM/speechlm/scripts/pretrain_speechlm/base_speechlmp.sh b/SpeechT5/SpeechLM/speechlm/scripts/pretrain_speechlm/base_speechlmp.sh new file mode 100644 index 0000000000000000000000000000000000000000..2e0a81290f5416876f5f3dbf0884d5dd1af0fb16 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/pretrain_speechlm/base_speechlmp.sh @@ -0,0 +1,43 @@ +# #################################### +# SpeechLM-P Base model # +# #################################### +[ $# -lt 2 ] && echo "Usage: $0 [mount=${PWD}] [world_size=32] [update_freq=1]" && exit 1 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +mount=$3 +world_size=$4 +update_freq=$5 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=1 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/base_speechlmp_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechlm/config/pretrain \ + --config-name speechlm_base_librispeech \ + common.user_dir=$CODE_ROOT/speechlm \ + \ + task.labels='["phn"]' \ + model.label_rate=100 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + dataset.train_subset=\"train_960+train_text.phn-ltr\" \ + dataset.valid_subset=\"dev_clean+dev_clean.phn-ltr\" \ + dataset.num_workers=0 \ + dataset.max_tokens=1400000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=pretrain + +# data_dir="/stdblob/users/v-ziqzhang/dataset/LibriLM/phn2char_sanych/tri4b_mono_label" +# text_data_dir="/stdblob/users/v-ziqzhang/dataset/LibriLM/phn2char_sanych/filt2k_sil025_m5std25_sil14_spn32/bin-idx" diff --git a/SpeechT5/SpeechLM/speechlm/scripts/pretrain_speechlm/large_speechlmp.sh b/SpeechT5/SpeechLM/speechlm/scripts/pretrain_speechlm/large_speechlmp.sh new file mode 100644 index 0000000000000000000000000000000000000000..75fc15fc7802217f4a0a8cab80ebefc7ddb8148a --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/pretrain_speechlm/large_speechlmp.sh @@ -0,0 +1,44 @@ +# #################################### +# SpeechLM-P Large model # +# #################################### +[ $# -lt 2 ] && echo "Usage: $0 [mount=${PWD}] [world_size=32] [update_freq=4]" && exit 1 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +mount=$3 +world_size=$4 +update_freq=$5 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=4 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/large_speechlmp_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechlm/config/pretrain \ + --config-name speechlm_large_librilight \ + common.user_dir=$CODE_ROOT/speechlm \ + \ + task.labels='["phn"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + dataset.train_subset=\"train_60k+train_text.phn-ltr\" \ + dataset.valid_subset=\"dev_clean+dev_clean.phn-ltr\" \ + dataset.num_workers=1 \ + dataset.max_tokens=900000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.fp16_scale_tolerance=0.1 \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=pretrain + +# data_dir="/stdblob/users/v-ziqzhang/dataset/librilight/chunkdata" +# text_data_dir="/stdblob/users/v-ziqzhang/dataset/LibriLM/phn2char_sanych/filt2k_sil025_m5std25_sil14_spn32/bin-idx" diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tokenizer_fastT2U/generate.sh b/SpeechT5/SpeechLM/speechlm/scripts/tokenizer_fastT2U/generate.sh new file mode 100644 index 0000000000000000000000000000000000000000..17d07646076376e68c681dc31427166b36b1faef --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tokenizer_fastT2U/generate.sh @@ -0,0 +1,42 @@ +##################################### +# Fast Text2Unit Model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [outdir={gen_set%/*}]" && exit 0 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +model_path=$1 +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +gen_set=$2 +outdir=$3 + +DATA_DIR=${gen_set%/*} +gen_set=${gen_set##*/} +[ -z $outdir ] && outdir=${DATA_DIR} + +CODE_ROOT=${PWD} + +nj=4 +for rank in $(seq 0 $((nj-1))); do + results_path=$outdir/pseudo_${gen_set}/${rank} + [ ! -d $results_path ] && mkdir -p $results_path + echo "$model_path" > $results_path/model.record + + python $CODE_ROOT/speechlm/generate_unit.py $DATA_DIR \ + --user-dir $CODE_ROOT/speechlm \ + --config-yaml config_generate.yaml \ + --path ${model_path} \ + --task fast_text_to_unit \ + --gen-subset $gen_set \ + \ + --beam 1 \ + --max-tokens 10000 \ + --results-path $results_path \ + --scoring sacrebleu \ + --skip-invalid-size-inputs-valid-test \ + --distributed-world-size $nj --distributed-rank ${rank} \ + & +done +wait diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tokenizer_fastT2U/infer.sh b/SpeechT5/SpeechLM/speechlm/scripts/tokenizer_fastT2U/infer.sh new file mode 100644 index 0000000000000000000000000000000000000000..306ee866d46c7e827ebe62a9151c070e969b953f --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tokenizer_fastT2U/infer.sh @@ -0,0 +1,41 @@ +##################################### +# Fast Text2Unit Model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 " && exit 0 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +model_path=$1 +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +gen_set=$2 + +DATA_DIR=${gen_set%/*} +gen_set=${gen_set##*/} +outdir=$src_dir/decode_${cpt} + +CODE_ROOT=${PWD} + +for subset in ${gen_set//,/ }; do + results_path=$outdir/phone2unit_${subset} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/speechlm/generate_unit.py $DATA_DIR \ + --user-dir $CODE_ROOT/speechlm \ + --config-yaml config.yaml \ + --path ${model_path} \ + --task fast_text_to_unit \ + --gen-subset $subset \ + \ + --beam 1 \ + --max-tokens 10000 \ + --results-path $results_path \ + --scoring sacrebleu + + echo $results_path + tail -n 1 $results_path/generate-*.txt + sleep 1s +done + +# --distributed-world-size 1000 --distributed-rank 0 \ diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tokenizer_fastT2U/train_s_5e-4.sh b/SpeechT5/SpeechLM/speechlm/scripts/tokenizer_fastT2U/train_s_5e-4.sh new file mode 100644 index 0000000000000000000000000000000000000000..6fec89b8a7cb15b334ad2d5dd5cf76ce44f811b4 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tokenizer_fastT2U/train_s_5e-4.sh @@ -0,0 +1,39 @@ +##################################### +# Fast Text2Unit Model # +##################################### +[ $# -lt 1 ] && echo "Usage: $0 [mount] [world_size=4] [update_freq=1]" && exit 0 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +DATA_DIR=$1 +mount=$2 +world_size=$3 +update_freq=$4 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=4 +[ -z $update_freq ] && update_freq=1 + +CODE_ROOT=${PWD} +MODEL_DIR="$mount/exp/fast_text2unit/small_lr5e-4_tristage_ls0.1_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +fairseq-train ${DATA_DIR} --save-dir ${MODEL_DIR} \ + --config-yaml config.yaml \ + --user-dir $CODE_ROOT/speechlm \ + --train-subset train_100 --valid-subset dev_clean \ + --num-workers 4 --max-tokens 20000 \ + --distributed-world-size ${world_size} --update-freq ${update_freq} \ + \ + --task fast_text_to_unit --criterion fasttext2unit_criterion --arch fasttext2unit_s \ + --label-smoothing 0.1 \ + \ + --clip-norm 5.0 --n-frames-per-step 1 \ + --dropout 0.1 --attention-dropout 0.1 \ + --optimizer adam --lr 5e-4 --lr-scheduler tri_stage --phase-ratio [0.3,0.0,0.7] --max-update 10000 \ + --seed 1 --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \ + \ + --save-interval 2 \ + --tensorboard-logdir ${MODEL_DIR} \ + --fp16 --find-unused-parameters \ + | tee ${MODEL_DIR}/train.log + +# DATA_DIR=/mnt/default/v-ziqzhang/dataset/librispeech_phone2unit/phone2unit diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/finetune_base_ctc.sh b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/finetune_base_ctc.sh new file mode 100644 index 0000000000000000000000000000000000000000..4b7c542fe9e8688a599eb52058b442edac613317 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/finetune_base_ctc.sh @@ -0,0 +1,48 @@ +# #################################### +# SpeechLM Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [mount=${PWD}] [world_size=8] [update_freq=1]" && exit 1 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +w2v_path=$1 +DATA_DIR=$2 +cpt=$3 +mount=$4 +world_size=$5 +update_freq=$6 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=1 + +CODE_ROOT=${PWD} + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="${mount}/exp/finetune_asr/$exp_name/ctc30k_from_${cpt}_bz1.6m_lr1e-5" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechlm/config/finetune \ + --config-name speechlm_base_100h \ + common.user_dir=$CODE_ROOT/speechlm \ + \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + model.w2v_path=${w2v_path} \ + \ + optimization.lr=[0.00001] \ + optimization.max_update=30000 \ + dataset.max_tokens=1600000 \ + optimization.update_freq=[${update_freq}] \ + distributed_training.distributed_world_size=${world_size} \ + \ + dataset.train_subset="train_clean_100" \ + dataset.valid_subset="dev_other" \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=${exp_name} + +# model_path=/mnt/default/v-ziqzhang/data/speechulm/exp/base/base_speechlmp_32gpu_1accum/checkpoint_298_400000.pt +# data_dir=/home/v-ziqzhang/dataset/LibriSpeech/asr diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/finetune_large_ctc.sh b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/finetune_large_ctc.sh new file mode 100644 index 0000000000000000000000000000000000000000..c07919000a3209cc75501e97ffb3559a907c1f1e --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/finetune_large_ctc.sh @@ -0,0 +1,48 @@ +# #################################### +# SpeechLM Large model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [mount=${PWD}] [world_size=8] [update_freq=4]" && exit 1 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +w2v_path=$1 +DATA_DIR=$2 +cpt=$3 +mount=$4 +world_size=$5 +update_freq=$6 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=4 + +CODE_ROOT=${PWD} + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="${mount}/exp/finetune_asr/$exp_name/ctc200k_from_${cpt}_bz3.6m_lr1e-5" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechlm/config/finetune \ + --config-name speechlm_large_960h \ + common.user_dir=$CODE_ROOT/speechlm \ + \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + model.w2v_path=${w2v_path} \ + \ + optimization.lr=[0.00001] \ + optimization.max_update=200000 \ + dataset.max_tokens=900000 \ + optimization.update_freq=[${update_freq}] \ + distributed_training.distributed_world_size=${world_size} \ + \ + dataset.train_subset="train_960" \ + dataset.valid_subset="dev_other" \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=${exp_name} + +# model_path=/mnt/default/v-ziqzhang/data/speechulm/exp/large/large_speechlmp_32gpu_4accum/checkpoint_31_400000.pt +# data_dir=/home/v-ziqzhang/dataset/LibriSpeech/asr diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc.sh b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc.sh new file mode 100644 index 0000000000000000000000000000000000000000..4c331d603168099b2a643fa631f74ea647f11173 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc.sh @@ -0,0 +1,40 @@ +##################################### +# SpeechLM Base model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_clean,dev_other,test_clean,test_other]" && exit 1 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +[ -z $gen_set ] && gen_set="dev_clean,dev_other,test_clean,test_other" +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}_ctc/${subset} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/speechlm/infer.py \ + --config-dir $CODE_ROOT/speechlm/config/decode \ + --config-name infer_viterbi \ + common.user_dir=$CODE_ROOT/speechlm \ + \ + dataset.gen_subset=${subset} \ + task.data=$DATA_DIR task.label_dir=$DATA_DIR task.normalize=false \ + common_eval.results_path=${results_path} common_eval.path=${model_path} \ + \ + common_eval.quiet=true \ + & +done +wait + +### important to know +# When loading the fine-tuned model for decoding, fairseq also loads the pre-trained model to use its states['model'] to build the model instance. +# To prevent the error about the w2v_path (if you don't have the pre-trained model at w2v_path), we set common_eval.model_overrides to override +# the w2v_path by speechlmp_base_cfg.pt. speechlmp_base_cfg.pt is just a pre-trained model checkpoint without parameters (only contains config). +# So, if you have trained a model with different model config (e.g. different encoder layers), you should modify the common_eval.model_overrides to your own. + # common_eval.model_overrides=\"{\'w2v_path\':\'$CODE_ROOT/speechlm/config/pretrain/speechlmp_base_cfg.pt\'}\" \ diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc_kenlm.sh b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc_kenlm.sh new file mode 100644 index 0000000000000000000000000000000000000000..3dfce021bb4948de5f37ede7d67c9386fa874d8a --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc_kenlm.sh @@ -0,0 +1,48 @@ +##################################### +# SpeechLM Base model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_clean,dev_other,test_clean,test_other]" && exit 1 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +[ -z $gen_set ] && gen_set="dev_clean,dev_other,test_clean,test_other" +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} +path_to_lexicon=${DATA_DIR}/librispeech_lexicon.lst +path_to_lm=${DATA_DIR}/4-gram.arpa +[ ! -f $path_to_lexicon ] && echo "Error: $path_to_lexicon not found !" && exit 1 +[ ! -f $path_to_lm ] && echo "Error: $path_to_lm not found !" && exit 1 + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}_ctc/${subset} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/speechlm/infer.py \ + --config-dir $CODE_ROOT/speechlm/config/decode \ + --config-name infer_kenlm \ + common.user_dir=$CODE_ROOT/speechlm \ + \ + dataset.gen_subset=${subset} \ + task.data=$DATA_DIR task.label_dir=$DATA_DIR task.normalize=false \ + common_eval.results_path=${results_path} common_eval.path=${model_path} \ + \ + decoding.lexicon=$path_to_lexicon \ + decoding.lmpath=$path_to_lm \ + decoding.beam=1500 \ + \ + common_eval.quiet=false \ + & +done +wait + +### important to know +# When loading the fine-tuned model for decoding, fairseq also loads the pre-trained model to use its states['model'] to build the model instance. +# To prevent the error about the w2v_path (if you don't have the pre-trained model at w2v_path), we set common_eval.model_overrides to override +# the w2v_path by speechlmp_base_cfg.pt. speechlmp_base_cfg.pt is just a pre-trained model checkpoint without parameters (only contains config). +# So, if you have trained a model with different model config (e.g. different encoder layers), you should modify the common_eval.model_overrides to your own. + # common_eval.model_overrides=\"{\'w2v_path\':\'$CODE_ROOT/speechlm/config/pretrain/speechlmp_base_cfg.pt\'}\" \ diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc_large.sh b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc_large.sh new file mode 100644 index 0000000000000000000000000000000000000000..265476a05a3225feafe00bad14b1615402c432b3 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc_large.sh @@ -0,0 +1,36 @@ +##################################### +# SpeechLM Large model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_clean,dev_other,test_clean,test_other]" && exit 1 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +[ -z $gen_set ] && gen_set="dev_clean,dev_other,test_clean,test_other" +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}_ctc/${subset} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/speechlm/infer.py \ + --config-dir $CODE_ROOT/speechlm/config/decode \ + --config-name infer_viterbi \ + common.user_dir=$CODE_ROOT/speechlm \ + \ + dataset.gen_subset=${subset} \ + task.data=$DATA_DIR task.label_dir=$DATA_DIR task.normalize=true \ + common_eval.results_path=${results_path} common_eval.path=${model_path} \ + \ + common_eval.quiet=true \ + & +done +wait + +# model_path=/mnt/default/v-ziqzhang/data/speechulm/finetune_asr/large_speechlmp_32gpu_4accum/ctc200k_from_400k_bz3.6m_lr1e-5/checkpoint_convert.pt +# data_dir=/home/v-ziqzhang/dataset/LibriSpeech/asr diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc_large_fsqlm.sh b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc_large_fsqlm.sh new file mode 100644 index 0000000000000000000000000000000000000000..165dd29ee7efcf78c6efb568432049be9b7512f7 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_asr/inference_ctc_large_fsqlm.sh @@ -0,0 +1,46 @@ +##################################### +# SpeechLM Large model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_clean,dev_other,test_clean,test_other]" && exit 1 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +[ -z $gen_set ] && gen_set="dev_clean,dev_other,test_clean,test_other" +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} +path_to_lexicon=${DATA_DIR}/librispeech_lexicon.lst +path_to_lm=${DATA_DIR}/fairseq_word_lm/lm_librispeech_word_transformer.pt +[ ! -f $path_to_lexicon ] && echo "Error: $path_to_lexicon not found !" && exit 1 +[ ! -f $path_to_lm ] && echo "Error: $path_to_lm not found !" && exit 1 + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}_ctc/${subset} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/speechlm/infer.py \ + --config-dir $CODE_ROOT/speechlm/config/decode \ + --config-name infer_fsqlm \ + common.user_dir=$CODE_ROOT/speechlm \ + \ + dataset.gen_subset=${subset} \ + task.data=$DATA_DIR task.label_dir=$DATA_DIR task.normalize=true \ + common_eval.results_path=${results_path} common_eval.path=${model_path} \ + \ + decoding.lexicon=$path_to_lexicon \ + decoding.lmpath=$path_to_lm \ + decoding.lmweight=0.90 \ + decoding.wordscore=-0.31 \ + decoding.beam=500 \ + \ + common_eval.quiet=false \ + & +done +wait + +# model_path=/mnt/default/v-ziqzhang/data/speechulm/finetune_asr/large_speechlmp_32gpu_4accum/ctc200k_from_400k_bz3.6m_lr1e-5/checkpoint_convert.pt +# data_dir=/home/v-ziqzhang/dataset/LibriSpeech/asr diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/ft_base_covost_enxx.sh b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/ft_base_covost_enxx.sh new file mode 100644 index 0000000000000000000000000000000000000000..3b8c12a822549d780622c006247463a845217e50 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/ft_base_covost_enxx.sh @@ -0,0 +1,80 @@ +# #################################### +# SpeechLM Base model # +# #################################### +[ $# -lt 4 ] && echo "Usage: $0 [mount=${PWD}] [world_size=8] [update_freq=2]" && exit 0 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +w2v_path=$1 +DATA_DIR=$2 +lang=$3 +cpt=$4 +mount=$5 +world_size=$6 +update_freq=$7 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=2 + +CODE_ROOT=${PWD} + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="$mount/exp/finetune_covost/$exp_name/legacy_en${lang}_from_${cpt}_bz3.2m_lr1e-4" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +max_tokens=1600000 +python $CODE_ROOT/fairseq/fairseq_cli/train.py ${DATA_DIR} \ + --save-dir ${MODEL_DIR} \ + --user-dir $CODE_ROOT/speechlm \ + --task speech_to_text \ + --config-yaml config_base_en${lang}.yaml \ + --train-subset "train_st_en_${lang}_local" \ + --valid-subset "dev_st_en_${lang}_local" \ + --fp16 \ + --seed 1 \ + \ + --ddp-backend no_c10d \ + --distributed-world-size ${world_size} \ + --tensorboard-logdir ${MODEL_DIR} \ + \ + --criterion label_smoothed_cross_entropy --report-accuracy \ + --label-smoothing 0.1 \ + \ + --optimizer adam \ + --clip-norm 1.0 \ + --lr 1e-04 \ + --lr-scheduler polynomial_decay --warmup-updates 5000 \ + --max-update 50000 \ + --total-num-update 50000 \ + --update-freq ${update_freq} \ + \ + --max-tokens ${max_tokens} \ + --max-sentences 16 \ + --max-tokens-valid ${max_tokens} \ + --grouped-shuffling \ + --max-source-positions ${max_tokens} \ + --skip-invalid-size-inputs-valid-test \ + --num-workers 0 \ + --best-checkpoint-metric "accuracy" \ + --maximize-best-checkpoint-metric \ + \ + --arch "speechlm_st_legacy" \ + --w2v-path ${w2v_path} \ + --layerdrop 0.1 \ + --decoder-layerdrop 0.1 \ + --activation-dropout 0.0 \ + --attention-dropout 0.1 \ + --feature-grad-mult 1.0 \ + \ + --apply-mask --mask-prob 0.5 \ + \ + --log-format json \ + --log-interval 100 \ + --save-interval 1 \ + --keep-last-epochs 5 \ + --keep-best-checkpoints 5 \ + \ + 2>&1 | tee ${MODEL_DIR}/train.log + +# model_path=/mnt/default/v-ziqzhang/data/speechulm/exp/base/base_speechlmp_32gpu_1accum/checkpoint_298_400000.pt +# data_dir=${HOME}/dataset/CommonVoice/v4/en/en-de diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/ft_large_covost_enxx.sh b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/ft_large_covost_enxx.sh new file mode 100644 index 0000000000000000000000000000000000000000..4e79bec834dec3ad4828954b3d0100adeaa80909 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/ft_large_covost_enxx.sh @@ -0,0 +1,80 @@ +# #################################### +# SpeechLM Large model # +# #################################### +[ $# -lt 4 ] && echo "Usage: $0 [mount=${PWD}] [world_size=8] [update_freq=4]" && exit 0 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +w2v_path=$1 +DATA_DIR=$2 +lang=$3 +cpt=$4 +mount=$5 +world_size=$6 +update_freq=$7 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=4 + +CODE_ROOT=${PWD} + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="$mount/exp/finetune_covost/$exp_name/legacy_en${lang}_from_${cpt}_bz3.6m_lr1e-4" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +max_tokens=900000 +python $CODE_ROOT/fairseq/fairseq_cli/train.py ${DATA_DIR} \ + --save-dir ${MODEL_DIR} \ + --user-dir $CODE_ROOT/speechlm \ + --task speech_to_text \ + --config-yaml config_large_en${lang}.yaml \ + --train-subset "train_st_en_${lang}_local" \ + --valid-subset "dev_st_en_${lang}_local" \ + --fp16 \ + --seed 1 \ + \ + --ddp-backend no_c10d \ + --distributed-world-size ${world_size} \ + --tensorboard-logdir ${MODEL_DIR} \ + \ + --criterion label_smoothed_cross_entropy --report-accuracy \ + --label-smoothing 0.1 \ + \ + --optimizer adam \ + --clip-norm 1.0 \ + --lr 1e-04 \ + --lr-scheduler polynomial_decay --warmup-updates 5000 \ + --max-update 50000 \ + --total-num-update 50000 \ + --update-freq ${update_freq} \ + \ + --max-tokens ${max_tokens} \ + --max-sentences 16 \ + --max-tokens-valid ${max_tokens} \ + --grouped-shuffling \ + --max-source-positions ${max_tokens} \ + --skip-invalid-size-inputs-valid-test \ + --num-workers 0 \ + --best-checkpoint-metric "accuracy" \ + --maximize-best-checkpoint-metric \ + \ + --arch "speechlm_st_legacy" \ + --w2v-path ${w2v_path} --encoder-embed-dim 1024 \ + --layerdrop 0.1 \ + --decoder-layerdrop 0.1 \ + --activation-dropout 0.0 \ + --attention-dropout 0.1 \ + --feature-grad-mult 1.0 \ + \ + --apply-mask --mask-prob 0.5 \ + \ + --log-format json \ + --log-interval 100 \ + --save-interval 1 \ + --keep-last-epochs 5 \ + --keep-best-checkpoints 5 \ + \ + 2>&1 | tee ${MODEL_DIR}/train.log + +# model_path=/mnt/default/v-ziqzhang/data/speechulm/exp/large/large_speechlmp_32gpu_4accum/checkpoint_31_400000.pt +# data_dir=${HOME}/dataset/CommonVoice/v4/en/en-de diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/inference_base.sh b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/inference_base.sh new file mode 100644 index 0000000000000000000000000000000000000000..513f99fdf897ff84d339e3a1be8407a3c51e8fe7 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/inference_base.sh @@ -0,0 +1,46 @@ +# #################################### +# SpeechLM Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [gen-set=dev] [beam_size=5] [lenpen=1.0]" && exit 0 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +lang=$3 +gen_set=$4 +beam_size=$5 +lenpen=$6 +[ -z $gen_set ] && gen_set="dev" +[ -z $beam_size ] && beam_size=5 +[ -z $lenpen ] && lenpen=1 +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} +results_path=$src_dir/decode_${cpt}_beam${beam_size}/${gen_set} +[ ! -d $results_path ] && mkdir -p $results_path + +python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --gen-subset ${gen_set}_st_en_${lang}_local \ + --max-tokens 2300000 \ + --max-source-positions 2300000 \ + --num-workers 0 \ + \ + --user-dir $CODE_ROOT/speechlm \ + --task speech_to_text \ + --config-yaml config_base_en${lang}.yaml \ + \ + --path ${model_path} \ + --results-path $results_path \ + \ + --scoring sacrebleu --max-len-a 0 --max-len-b 512 \ + --beam ${beam_size} \ + --lenpen $lenpen \ + + echo $results_path + tail -n 1 $results_path/generate-*.txt + sleep 1s + +# model_path=/mnt/default/v-ziqzhang/data/speechulm/finetune_covost/base_speechlmp_32gpu_1accum/legacy_ende_from_400k_bz3.2m_lr1e-4/checkpoint_best_convert.pt +# data_dir=dataset/CommonVoice/v4/en/en-de diff --git a/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/inference_large.sh b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/inference_large.sh new file mode 100644 index 0000000000000000000000000000000000000000..6957ad58c487e403bb38c05b7315f71cd63cfa8e --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/scripts/tune_speechlm_st/inference_large.sh @@ -0,0 +1,46 @@ +# #################################### +# SpeechLM Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [gen-set=dev] [beam_size=5] [lenpen=1.0]" && exit 0 +[ ${PWD##*/} != SpeechLM ] && echo "Error: dir not match! Switch to SpeechLM/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +lang=$3 +gen_set=$4 +beam_size=$5 +lenpen=$6 +[ -z $gen_set ] && gen_set="dev" +[ -z $beam_size ] && beam_size=5 +[ -z $lenpen ] && lenpen=1 +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} +results_path=$src_dir/decode_${cpt}_beam${beam_size}/${gen_set} +[ ! -d $results_path ] && mkdir -p $results_path + +python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --gen-subset ${gen_set}_st_en_${lang}_local \ + --max-tokens 2300000 \ + --max-source-positions 2300000 \ + --num-workers 0 \ + \ + --user-dir $CODE_ROOT/speechlm \ + --task speech_to_text \ + --config-yaml config_large_en${lang}.yaml \ + \ + --path ${model_path} \ + --results-path $results_path \ + \ + --scoring sacrebleu --max-len-a 0 --max-len-b 512 \ + --beam ${beam_size} \ + --lenpen $lenpen \ + + echo $results_path + tail -n 1 $results_path/generate-*.txt + sleep 1s + +# model_path=/mnt/default/v-ziqzhang/data/speechulm/finetune_covost/large_speechlmp_32gpu_4accum/legacy_ende_from_400k_bz3.6m_lr1e-4/checkpoint.avgnbest_convert.pt +# data_dir=dataset/CommonVoice/v4/en/en-de diff --git a/SpeechT5/SpeechLM/speechlm/tasks/fast_text_to_unit.py b/SpeechT5/SpeechLM/speechlm/tasks/fast_text_to_unit.py new file mode 100644 index 0000000000000000000000000000000000000000..b05324803e3359837832148ff2e5dad3ab1ba367 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/tasks/fast_text_to_unit.py @@ -0,0 +1,174 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import torch +import numpy as np +import logging +from pathlib import Path +from argparse import Namespace + +from fairseq.tasks import LegacyFairseqTask, register_task +from fairseq.data import Dictionary, encoders +from fairseq.data.audio.speech_to_text_joint_dataset import S2TJointDataConfig + +from speechlm.unit_generator import NonAutoregressiveUnitGenerator +from speechlm.data.text_to_unit_dataset import Text2UnitDatasetCreator + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO, +) +logger = logging.getLogger(__name__) + + +@register_task("fast_text_to_unit") +class FastTextToUnitTask(LegacyFairseqTask): + @staticmethod + def add_args(parser): + parser.add_argument("data", help="manifest root path") + parser.add_argument( + "--config-yaml", + type=str, + default="config.yaml", + help="Configuration YAML filename (under manifest root)", + ) + parser.add_argument( + "--max-source-positions", + default=2048, + type=int, + metavar="N", + help="max number of tokens in the source sequence", + ) + parser.add_argument( + "--max-target-positions", + default=1024, + type=int, + metavar="N", + help="max number of tokens in the target sequence", + ) + parser.add_argument("--n-frames-per-step", type=int, default=1) + parser.add_argument("--eos-prob-threshold", type=float, default=0.5) + parser.add_argument("--eval-inference", action="store_true") + parser.add_argument("--eval-tb-nsample", type=int, default=8) + parser.add_argument("--vocoder", type=str, default="griffin_lim") + parser.add_argument("--spec-bwd-max-iter", type=int, default=8) + + def __init__(self, args, src_dict, tgt_dict): + super().__init__(args) + self.src_dict = src_dict + self.tgt_dict = tgt_dict + self.data_cfg = S2TJointDataConfig(Path(args.data) / args.config_yaml) + self.speaker_to_id = self._get_speaker_to_id() + + @classmethod + def setup_task(cls, args, **kwargs): + data_cfg = S2TJointDataConfig(Path(args.data) / args.config_yaml) + src_dict_path = Path(args.data) / data_cfg.src_vocab_filename + if not src_dict_path.is_file(): + raise FileNotFoundError(f"Dict not found: {src_dict_path.as_posix()}") + src_dict = Dictionary.load(src_dict_path.as_posix()) + logger.info( + f"Source dictionary size ({data_cfg.src_vocab_filename}): " f"{len(src_dict):,}" + ) + tgt_dict_path = Path(args.data) / data_cfg.vocab_filename + if not tgt_dict_path.is_file(): + raise FileNotFoundError(f"Dict not found: {tgt_dict_path.as_posix()}") + tgt_dict = Dictionary.load(tgt_dict_path.as_posix()) + logger.info( + f"Target dictionary size ({data_cfg.vocab_filename}): " f"{len(tgt_dict):,}" + ) + + if getattr(args, "train_subset", None) is not None: + if not all(s.startswith("train") for s in args.train_subset.split(",")): + raise ValueError('Train splits should be named like "train*".') + return cls(args, src_dict, tgt_dict) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + is_train_split = split.startswith("train") + pre_tokenizer = self.build_tokenizer(self.args) + bpe_tokenizer = self.build_bpe(self.args) + self.datasets[split] = Text2UnitDatasetCreator.from_tsv( + self.args.data, + self.data_cfg, + split, + self.src_dict, + pre_tokenizer, + bpe_tokenizer, + is_train_split=is_train_split, + epoch=epoch, + seed=self.args.seed, + n_frames_per_step=self.args.n_frames_per_step, + speaker_to_id=self.speaker_to_id, + ) + + @property + def target_dictionary(self): + return self.tgt_dict + + @property + def source_dictionary(self): + return self.src_dict + + def max_positions(self): + return self.args.max_source_positions, self.args.max_target_positions + + def _get_speaker_to_id(self): + speaker_to_id = None + speaker_set_filename = self.data_cfg.config.get("speaker_set_filename") + if speaker_set_filename is not None: + speaker_set_path = Path(self.args.data) / speaker_set_filename + with open(speaker_set_path) as f: + speaker_to_id = {r.strip(): i for i, r in enumerate(f)} + return speaker_to_id + + @classmethod + def get_speaker_embeddings(cls, args): + # It Will be used in FastText2UnitModel model, insdead of nn.Embedding on speaker-id, we default to use x-vectors extracted ahead. + # This is for varying the speaker information when generating units from text. + if args.speaker_to_id is not None: + embed_speaker = torch.nn.Embedding( + len(args.speaker_to_id), args.speaker_embed_dim + ) + elif args.speaker_embedding_type == "x-vector": + # return LayerNorm(args.speaker_embed_dim) + return lambda x: x.unsqueeze(1) + elif args.speaker_embedding_type == "i-vector": + # return LayerNorm(args.speaker_embed_dim) + return lambda x: x + else: + embed_speaker = None + return embed_speaker + + def build_model(self, cfg): + cfg.pitch_min = self.data_cfg.config["features"].get("pitch_min", None) + cfg.pitch_max = self.data_cfg.config["features"].get("pitch_max", None) + cfg.energy_min = self.data_cfg.config["features"].get("energy_min", None) + cfg.energy_max = self.data_cfg.config["features"].get("energy_max", None) + cfg.speaker_to_id = self.speaker_to_id + cfg.speaker_embedding_type = self.data_cfg.config.get("speaker_embedding_type", None) + model = super().build_model(cfg) + self.generator = None + if getattr(cfg, "eval_inference", False): + self.generator = self.build_generator([model], cfg) + return model + + def build_generator(self, models, cfg, vocoder=None, **unused): + model = models[0] + assert getattr(model, "NON_AUTOREGRESSIVE") is True + return NonAutoregressiveUnitGenerator(model, vocoder, self.data_cfg) + + + def build_tokenizer(self, args): + logger.info(f"pre-tokenizer: {self.data_cfg.pre_tokenizer}") + return encoders.build_tokenizer(Namespace(**self.data_cfg.pre_tokenizer)) + + def build_bpe(self, args): + logger.info(f"tokenizer: {self.data_cfg.bpe_tokenizer}") + return encoders.build_bpe(Namespace(**self.data_cfg.bpe_tokenizer)) diff --git a/SpeechT5/SpeechLM/speechlm/tasks/joint_sc2t_pretrain.py b/SpeechT5/SpeechLM/speechlm/tasks/joint_sc2t_pretrain.py new file mode 100644 index 0000000000000000000000000000000000000000..86af617670c5dcbb2aa3274057755ebb04b66547 --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/tasks/joint_sc2t_pretrain.py @@ -0,0 +1,976 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import os +import sys +from typing import Dict, List, Optional, Tuple +from pathlib import Path + +import numpy as np +from argparse import Namespace +from collections import OrderedDict + +import torch +from dataclasses import dataclass, field +from fairseq.data import ( + Dictionary, + encoders, + data_utils, + StripTokenDataset, + PrependTokenDataset, + AppendTokenDataset, + DenoisingDataset, + ConcatDataset, + FairseqDataset, + iterators, + ResamplingDataset, + MaskTokensDataset, + LanguagePairDataset, +) +from fairseq.data.audio.speech_to_text_joint_dataset import S2TJointDataConfig +from fairseq.data.shorten_dataset import maybe_shorten_dataset +# from fairseq.data.encoders.utils import get_whole_word_mask +from fairseq.dataclass.configs import FairseqDataclass +from fairseq.tasks import register_task +from fairseq.tasks.fairseq_task import FairseqTask +from fairseq.dataclass.constants import ChoiceEnum +from omegaconf import MISSING + +from speechlm.data.multimodal_corpus_dataset import MultiCorpusDataset +from speechlm.data.load_langpair_dataset import load_langpair_dataset +from speechlm.data.language_trible_dataset import LanguageTripleDataset, load_langtriple_dataset +from speechlm.data.hubert_dataset import HubertDataset + +logger = logging.getLogger(__name__) + +TOKENIZER_CHOICES = ChoiceEnum(["sentencepiece", "hubert_letters", "none"]) + +def _lang_token(lang: str): + return "".format(lang) + +def _lang_token_index(dic: Dictionary, lang: str): + """Return language token index.""" + idx = dic.index(_lang_token(lang)) + assert idx != dic.unk_index, "cannot find language token for lang {}".format(lang) + return idx + + +class LabelEncoder(object): + def __init__(self, dictionary: Dictionary) -> None: + self.dictionary = dictionary + + def __call__(self, label: str) -> List[str]: + return self.dictionary.encode_line( + label, append_eos=False, add_if_not_exist=False, + ) + + +### wrap the initial get_whole_word_mask which needs bpe_tokenizer, +### here we just assume words are splited by "|" or "" +def get_whole_word_mask(args, dictionary): + def is_beginning_of_word(i): + if i < dictionary.nspecial: + # special elements are always considered beginnings + return True + tok = dictionary[i] + if tok.startswith("madeupword"): + return True + elif tok in ["", "", "", "", "|", ""]: + return True + else: + return False + + mask_whole_words = torch.ByteTensor( + list(map(is_beginning_of_word, range(len(dictionary)))) + ) + return mask_whole_words + +def get_repeative_start(tokens): + """ + tokens: torch.Tensor with repeative tokens + """ + length = len(tokens) + rep_start_id = tokens[:-1] != tokens[1:] + return torch.cat([torch.tensor([True]), rep_start_id]) + +@dataclass +class TextPretrainingConfig(FairseqDataclass): + ### added for joint pretraining + text_data: Optional[str] = field( + default=None, + metadata={ + "help": "if set, path to text data directory", + }, + ) + seed: Optional[int] = field( + default=1, + metadata={ + "help": "for ordered_indices in MulticorpusDataset", + }, + ) + tokens_per_sample: Optional[int] = field( + default=512, + metadata={ + "help": "max number of total tokens over all segments per sample for dataset", + }, + ) + tokens_per_sample_tgt: Optional[int] = field( + default=512, + metadata={ + "help": "max number of total tokens over all segments per target sample for dataset", + }, + ) + sample_break_mode: Optional[str] = field( + default="eos", + metadata={ + "help": "mode for breaking sentence", + }, + ) + mask: Optional[float] = field( + default=0.3, + metadata={ + "help": "fraction of words/subwords that will be masked", + }, + ) + leave_unmasked_prob: float = field( + default=0.1, + metadata={"help": "probability that a masked token is unmasked"}, + ) + mask_random: Optional[float] = field( + default=0.1, + metadata={ + "help": "instead of using [MASK], use random token this often", + }, + ) + freq_weighted_replacement: bool = field( + default=False, + metadata={"help": "sample random replacement words based on word frequencies"}, + ) + mask_whole_words: bool = field( + default=True, + metadata={"help": "mask whole words; you may also want to set --bpe"}, + ) + mask_repeative_tokens: bool = field( + default=True, + metadata={"help": "mask repeative_tokens; if mask_whole_words=False"}, + ) + mask_multiple_length: int = field( + default=1, + metadata={"help": "repeat the mask indices multiple times"}, + ) + mask_stdev: float = field( + default=0.0, + metadata={"help": "stdev of the mask length"}, + ) + shorten_method: Optional[str] = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed tokens_per_sample", + "choices": "none/truncate/random_crop" + }, + ) + shorten_data_split_list: Optional[str] = field( + default="", + metadata={ + "help": "comma_separated list of dataset splits to apply shortening to, e.g., train,valid (default: all dataset splits)", + }, + ) + + ### below hypra-parameters is used in bart + insert: Optional[float] = field( + default=0.0, + metadata={ + "help": "insert this percentage of additional random tokens", + }, + ) + permute: Optional[float] = field( + default=0.0, + metadata={ + "help": "take this proportion of subwords and permute them", + }, + ) + rotate: Optional[float] = field( + default=0.0, + metadata={ + "help": "rotate this proportion of inputs", + }, + ) + poisson_lambda: Optional[float] = field( + default=3.5, + metadata={ + "help": "randomly shuffle sentences for this proportion of inputs", + }, + ) + permute_sentences: Optional[float] = field( + default=0.0, + metadata={ + "help": "shuffle this proportion of sentences in all inputs", + }, + ) + mask_length: Optional[str] = field( + default="span-poisson", + metadata={ + "help": "mask length to choose", + "choice": "subword/word/span-poisson" + }, + ) + replace_length: Optional[int] = field( + default=1, + metadata={ + "help": "when masking N tokens, replace with 0, 1, or N tokens (use -1 for N)", + }, + ) + shuffle_instance: Optional[bool] = field( + default=False, + metadata={"help": "shuffle instance"}, + ) + max_source_positions: Optional[int] = field( + default=1024, + metadata={"help": "max number of tokens in the source sequence"}, + ) + max_target_positions: Optional[int] = field( + default=1024, + metadata={"help": "max number of tokens in the target sequence"}, + ) + bpe: Optional[str] = field( + default="", + metadata={ + "help": "will wrapped by the text_data_config yaml", + }, + ) + data_config: Optional[str] = field( + default=None, + metadata={ + "help": "a config yaml specify the bpe model of text data", + }, + ) + text_maxtokens_ratio: Optional[float] = field( + default=1.0, + metadata={ + "help": "for text, max_tokens = max_tokens * text_maxtokens_ratio / 320 ", + }, + ) + prepend_tgt_lang_tag: bool = field( + default=False, + metadata={"help": "prepend tgt_lang_tag to replace "}, + ) + mask_text_ratio: Optional[float] = field( + default=0.0, + metadata={ + "help": "mask_text_ratio, for paired data", + }, + ) + truncate_mono_source: bool = field( + default=True, + metadata={"help": "truncate mono source-side examples that exceed max-positions"}, + ) + + +@dataclass +class JointPretrainingConfig(FairseqDataclass): + data: str = field( + default=MISSING, metadata={"help": "path to speech data directory"} + ) + fine_tuning: bool = field( + default=False, metadata={"help": "set to true if fine-tuning Hubert"} + ) + labels: List[str] = field( + default_factory=lambda: ["ltr"], + metadata={ + "help": ( + "extension of the label files to load, frame-level labels for" + " pre-training, and sequence-level label for fine-tuning" + ) + }, + ) + label_dir: Optional[str] = field( + default=None, + metadata={ + "help": "if set, looks for labels in this directory instead", + }, + ) + label_rate: int = field( + default=-1, + metadata={"help": "label frame rate. -1 for sequence label"}, + ) + sample_rate: int = field( + default=16_000, + metadata={ + "help": "target sample rate. audio files will be up/down " + "sampled to this rate" + }, + ) + normalize: bool = field( + default=False, + metadata={ + "help": "if set, normalizes input to have 0 mean and unit variance" + }, + ) + enable_padding: bool = field( + default=False, + metadata={"help": "pad shorter samples instead of cropping"}, + ) + max_keep_size: Optional[int] = field( + default=None, + metadata={"help": "exclude sample longer than this"}, + ) + max_sample_size: Optional[int] = field( + default=None, + metadata={"help": "max sample size to crop to for batching"}, + ) + min_sample_size: Optional[int] = field( + default=None, + metadata={"help": "min sample size to crop to for batching"}, + ) + single_target: Optional[bool] = field( + default=False, + metadata={ + "help": "if set, AddTargetDatasets outputs same keys " + "as AddTargetDataset" + }, + ) + random_crop: Optional[bool] = field( + default=True, + metadata={"help": "always crop from the beginning if false"}, + ) + pad_audio: Optional[bool] = field( + default=False, + metadata={"help": "pad audio to the longest one in the batch if true"}, + ) + store_labels: Optional[bool] = field( + default=True, + metadata={"help": "store spm labels in memory, should be true when fine-tune with bpe"}, + ) + add_decoder_target: bool = field( + default=False, + metadata={"help": "contral the model architecture, if set True, load reduced unit as target"}, + ) + split_modality_batch: bool = field( + default=False, + metadata={"help": "whether create all samples of different modalities in a batch"}, + ) + speech_tgt_lang: str = field( + default="", + metadata={"help": "prepend to prev_output_tokens to replace , only used for decoder"}, + ) + speech_sampling_alpha: float = field( + default=0.2, + metadata={ + "help": "Hyper-parameter alpha = 1/T for temperature-based speech resampling." + "(alpha = 1 for no resampling)" + }, + ) + text_sampling_alpha: float = field( + default=0.2, + metadata={ + "help": "Hyper-parameter alpha = 1/T for temperature-based text resampling." + "(alpha = 1 for no resampling)" + }, + ) + hubert_tokenizer: Optional[TOKENIZER_CHOICES] = field( + default="none", + metadata={"help": "which tokenizer for processing text"}, + ) + sp_path: Optional[str] = field( + default=None, + metadata={"help": "sentencepiece model path if using bpe tokenizer"}, + ) + + text_cfg: TextPretrainingConfig = TextPretrainingConfig() + + +@register_task("joint_sc2t_pretraining", dataclass=JointPretrainingConfig) +class Jsc2tPretrainingTask(FairseqTask): + + cfg: JointPretrainingConfig + + def __init__( + self, + cfg: JointPretrainingConfig, + ) -> None: + super().__init__(cfg) + + logger.info(f"current directory is {os.getcwd()}") + logger.info(f"JSTPretrainingTask Config {cfg}") + + self.cfg = cfg + self.fine_tuning = cfg.fine_tuning + self.blank_symbol = "" + + self.state.add_factory("hubert_tokenizer", self.build_tokenizer) + if self.cfg.text_cfg.text_data is not None and os.path.exists(self.cfg.text_cfg.text_data): + self.state.add_factory("text_dictionary", self.load_text_dictionary) + self.state.add_factory("text_src_dictionary", self.load_text_src_dictionary) + if cfg.fine_tuning: + self.state.add_factory("target_dictionary", self.load_dictionaries) + else: + self.state.add_factory("dictionaries", self.load_dictionaries) + + if cfg.text_cfg.data_config is not None: + self.text_data_cfg = S2TJointDataConfig(Path(f"{cfg.text_cfg.text_data}/{cfg.text_cfg.data_config}")) + self.cfg.text_cfg.bpe = self.text_data_cfg.bpe_tokenizer["bpe"] + else: + self.text_data_cfg = None + + @property + def source_dictionary(self) -> Optional[Dictionary]: + return None + + @property + def target_dictionary(self) -> Optional[Dictionary]: + return self.state.target_dictionary + + @property + def dictionaries(self) -> List[Dictionary]: + return self.state.dictionaries + + @property + def text_dictionary(self) -> Optional[Dictionary]: + return self.state.text_dictionary + + @property + def text_src_dictionary(self) -> Optional[Dictionary]: + return self.state.text_src_dictionary + + @property + def hubert_tokenizer(self): + return self.state.hubert_tokenizer + + def load_dictionaries(self): + label_dir = self.cfg.data if self.cfg.label_dir is None else self.cfg.label_dir + dictionaries = [Dictionary.load(f"{label_dir}/dict.{label}.txt") for label in self.cfg.labels] + if not self.cfg.fine_tuning: + for dictionary in dictionaries: + dictionary.add_symbol("") + return dictionaries[0] if self.cfg.fine_tuning else dictionaries + + def load_text_dictionary(self): + tgt_dict_path = f"{self.cfg.text_cfg.text_data}/{self.text_data_cfg.vocab_filename if self.text_data_cfg is not None else 'dict.txt'}" + if not os.path.isfile(tgt_dict_path): + raise FileNotFoundError(f"Dict not found: {tgt_dict_path}") + text_dictionary = Dictionary.load(tgt_dict_path) + self.mask_idx = text_dictionary.add_symbol("") + return text_dictionary + + def load_text_src_dictionary(self): + src_dict_path = f"{self.cfg.text_cfg.text_data}/{self.text_data_cfg.src_vocab_filename if self.text_data_cfg is not None else 'dict.txt'}" + if not os.path.isfile(src_dict_path): + raise FileNotFoundError(f"Dict not found: {src_dict_path}") + src_text_dictionary = Dictionary.load(src_dict_path) + self.mask_idx = src_text_dictionary.add_symbol("") + return src_text_dictionary + + @classmethod + def setup_task( + cls, cfg: JointPretrainingConfig, **kwargs + ) -> "Jsc2tPretrainingTask": + return cls(cfg) + + def get_label_dir(self) -> str: + if self.cfg.label_dir is None: + return self.cfg.data + return self.cfg.label_dir + + def load_paired_dataset(self, text_split, truncate_source=False): + text_split, lp = text_split.rsplit('.', 1) # e.g. "libritext.ltr-ltr" + if len(lp.split("-")) == 2: + src, tgt = lp.split("-") + if src == tgt: + logger.warn(f"| trying to load monolingual dataset {text_split}.{lp}, please check your task is right.") + paired_dataset = self.load_char_bart_dataset(f"{text_split}.{lp}.{tgt}") + return paired_dataset + paired_dataset = load_langpair_dataset( + self.cfg.text_cfg.text_data, + text_split, + src, + self.text_src_dictionary, + tgt, + self.text_dictionary, + combine=True, + dataset_impl=None, + upsample_primary=1, + left_pad_source=False, + left_pad_target=False, + max_source_positions=self.cfg.text_cfg.tokens_per_sample, + max_target_positions=self.cfg.text_cfg.tokens_per_sample, + truncate_source=truncate_source, + prepend_bos=False, + load_alignments=False, + append_source_id=True if self.cfg.text_cfg.prepend_tgt_lang_tag else False, + lang_format="" if self.cfg.text_cfg.prepend_tgt_lang_tag else "[{}]", + input_feeding=self.cfg.add_decoder_target, + ) + if self.cfg.text_cfg.mask_text_ratio > 0: + # add mask + self.mask_idx = self.text_src_dictionary.index("") + mask_whole_words = None + if self.cfg.text_cfg.mask_whole_words: + mask_whole_words = get_whole_word_mask(self.cfg.text_cfg, self.text_src_dictionary) + elif self.cfg.text_cfg.mask_repeative_tokens: + mask_whole_words = get_repeative_start + + src_dataset, src_unmasked_dataset = MaskTokensDataset.apply_mask( + paired_dataset.src, + self.text_src_dictionary, + pad_idx=self.text_src_dictionary.pad(), + mask_idx=self.mask_idx, + seed=self.cfg.text_cfg.seed, + mask_prob=self.cfg.text_cfg.mask_text_ratio, + leave_unmasked_prob=self.cfg.text_cfg.leave_unmasked_prob, + random_token_prob=self.cfg.text_cfg.mask_random, + freq_weighted_replacement=self.cfg.text_cfg.freq_weighted_replacement, + mask_whole_words=mask_whole_words, + mask_multiple_length=self.cfg.text_cfg.mask_multiple_length, + mask_stdev=self.cfg.text_cfg.mask_stdev, + ) + tgt_dataset = paired_dataset.tgt if paired_dataset.tgt is not None else src_unmasked_dataset + paired_dataset = LanguageTripleDataset( + src_dataset, + src_dataset.sizes, + self.text_src_dictionary, + src_unmasked_dataset, + src_unmasked_dataset.sizes, + self.text_src_dictionary, + tgt_dataset, + tgt_dataset.sizes, + self.text_dictionary, + left_pad_source=False, + left_pad_target=False, + align_dataset=None, + eos=None, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + ) + else: + src, ref, tgt = lp.split("-") + paired_dataset = load_langtriple_dataset( + self.cfg.text_cfg.text_data, + text_split, + src, + self.text_src_dictionary, + ref, + self.dictionaries[-1], + tgt, + self.text_dictionary, + combine=True, + dataset_impl=None, + upsample_primary=1, + left_pad_source=False, + left_pad_target=False, + max_source_positions=self.cfg.text_cfg.tokens_per_sample, + max_target_positions=self.cfg.text_cfg.tokens_per_sample, + truncate_source=truncate_source, + prepend_bos=False, + load_alignments=False, + append_source_id=True if self.cfg.text_cfg.prepend_tgt_lang_tag else False, + lang_format="" if self.cfg.text_cfg.prepend_tgt_lang_tag else "[{}]", + ) + return paired_dataset + + def load_dataset(self, split: str, epoch=1, **kwargs) -> None: + """ + Create Wav dataset for audio, and Index dataset for phonemized text, + then concatenate them to by fairseq.data.multi_corpus_dataset.MultiCorpusDataset. + """ + speech_splits = split.split('+')[0].split(',') + ### 1st, create a speech dataset using STSpeechDataset (modified from HubertDataset) + dicts = [self.target_dictionary] if self.cfg.fine_tuning else self.dictionaries + pad_list = [dict.pad() for dict in dicts] + eos_list = [dict.eos() for dict in dicts] + procs = [LabelEncoder(dict) for dict in dicts] + if self.cfg.speech_tgt_lang != "": + tgt_lang_idx = _lang_token_index(dicts[0], self.cfg.speech_tgt_lang) + logger.info(f"Will prepend <{tgt_lang_idx}> at the beginning of prev_output_tokens to replace ") + else: + tgt_lang_idx = None + + + # hubert v1: pad_audio=True, random_crop=False; + speech_datasets = [] + for speech_split in speech_splits: + paths = [ + f"{self.get_label_dir()}/{speech_split}.{l}" for l in self.cfg.labels + ] + speech_datasets.append( + HubertDataset( + f"{self.cfg.data}/{speech_split}.tsv", + sample_rate=self.cfg.sample_rate, + label_paths=paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_keep_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=self.cfg.store_labels, + random_crop=self.cfg.random_crop, + single_target=self.cfg.single_target, + tgt_dict=dicts[0], + add_decoder_target=self.cfg.add_decoder_target, + fine_tuning=self.cfg.fine_tuning, + tgt_lang_idx=tgt_lang_idx, + tokenizer=self.hubert_tokenizer, + ) + ) + if len(speech_datasets) > 1: + speech_dataset = ConcatDataset(speech_datasets) + else: + speech_dataset = speech_datasets[0] + + has_text = len(split.split('+')) > 1 + if not has_text: + assert speech_dataset is not None + self.datasets[split] = speech_dataset + return + + ### 2nd, create paired/mono text datasets using Langpairdataset + if split.split('+')[1] != '': + paired_splits = [paired_split for paired_split in split.split('+')[1].split(',') if paired_split != ''] + paired_datasets = [self.load_paired_dataset(paired_split) for paired_split in paired_splits] + else: + paired_splits, paired_datasets = [], [] + + if len(split.split('+')) > 2 and split.split('+')[2] != '': + mono_splits = [mono_split for mono_split in split.split('+')[2].split(',') if mono_split != ''] + mono_datasets = [self.load_paired_dataset(mono_split, truncate_source=self.cfg.text_cfg.truncate_mono_source) for mono_split in mono_splits] + else: + mono_splits, mono_datasets = [], [] + + assert len(mono_datasets + paired_datasets) > 0, f"split {split} has no text! you should check out for that" + + ### 3rd, if provided, create a supervised dataset with labeled data + if len(split.split('+')) > 3 and split.split('+')[3] != '': + assert len(paired_splits) > 0, f"supervised dataset can not be loaded without text paired dataset!" + tgt = paired_splits[0].rsplit('.', 1)[1].split("-")[1] + sup_split = split.split('+')[3] + + sup_dataset = HubertDataset( + f"{self.cfg.data}/{sup_split}.tsv", + sample_rate=self.cfg.sample_rate, + label_paths=[f"{self.get_label_dir()}/{sup_split}.{tgt}"], + label_rates=[-1], + pad_list=[self.text_dictionary.pad()], + eos_list=[self.text_dictionary.eos()], + label_processors=[LabelEncoder(self.text_dictionary)], + max_keep_sample_size=self.cfg.max_keep_size, + min_keep_sample_size=None, + max_sample_size=None, + pad_audio=True, + normalize=self.cfg.normalize, + store_labels=self.cfg.store_labels, + random_crop=False, + single_target=True, + tgt_dict=self.text_dictionary, + add_decoder_target=self.cfg.add_decoder_target, + fine_tuning=True, + tgt_lang_idx=None, + tokenizer=None, + ) + else: + sup_dataset = None + + ### 4th, compose a MultiCorpusDataset + dataset_dict, max_positions_dict, distributions, max_tokens_ratios = self.resample_multi_modality_dataset( + speech_dataset, sup_dataset, mono_datasets, paired_datasets, mono_splits, paired_splits, epoch=epoch, + ) + self.datasets[split] = MultiCorpusDataset( + dataset_dict, + max_positions=max_positions_dict, + distribution=distributions, + max_tokens_ratio=max_tokens_ratios, + seed=self.cfg.text_cfg.seed, + sort_indices=True, + ) + + + def max_positions(self) -> Tuple[int, int]: + return (sys.maxsize, sys.maxsize) + + def filter_indices_by_size( + self, indices: np.array, *args, **kwargs + ) -> np.array: + return indices + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=1, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + """ + Get an iterator that yields batches of data from the given dataset. + Args: + dataset (~fairseq.data.FairseqDataset): dataset to batch + max_tokens (int, optional): max number of tokens in each batch + (default: None). + max_sentences (int, optional): max number of sentences in each + batch (default: None). + max_positions (optional): max sentence length supported by the + model (default: None). + ignore_invalid_inputs (bool, optional): don't raise Exception for + sentences that are too long (default: False). + required_batch_size_multiple (int, optional): require batch size to + be a multiple of N (default: 1). + seed (int, optional): seed for random number generator for + reproducibility (default: 1). + num_shards (int, optional): shard the data iterator into N + shards (default: 1). + shard_id (int, optional): which shard of the data iterator to + return (default: 0). + num_workers (int, optional): how many subprocesses to use for data + loading. 0 means the data will be loaded in the main process + (default: 0). + epoch (int, optional): the epoch to start the iterator from + (default: 1). + data_buffer_size (int, optional): number of batches to + preload (default: 0). + disable_iterator_cache (bool, optional): don't cache the + EpochBatchIterator (ignores `FairseqTask::can_reuse_epoch_itr`) + (default: False). + skip_remainder_batch (bool, optional): if set, discard the last + batch in each training epoch, as the last batch is often smaller than + local_batch_size * distributed_word_size (default: ``True``). + grouped_shuffling (bool, optional): group batches with each groups + containing num_shards batches and shuffle groups. Reduces difference + between sequence lengths among workers for batches sorted by length. + update_epoch_batch_itr (bool optional): if true then donot use the cached + batch iterator for the epoch + + Returns: + ~fairseq.iterators.EpochBatchIterator: a batched iterator over the + given dataset split + """ + if self.fine_tuning or not isinstance(dataset, MultiCorpusDataset): + return super().get_batch_iterator( + dataset, + max_tokens=max_tokens, + max_sentences=max_sentences, + max_positions=max_positions, + ignore_invalid_inputs=ignore_invalid_inputs, + required_batch_size_multiple=required_batch_size_multiple, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + data_buffer_size=data_buffer_size, + disable_iterator_cache=disable_iterator_cache, + skip_remainder_batch=skip_remainder_batch, + grouped_shuffling=grouped_shuffling, + update_epoch_batch_itr=update_epoch_batch_itr, + ) + + can_reuse_epoch_itr = ( + not disable_iterator_cache + and not update_epoch_batch_itr + and self.can_reuse_epoch_itr(dataset) + ) + if can_reuse_epoch_itr and dataset in self.dataset_to_epoch_iter: + logger.debug("reusing EpochBatchIterator for epoch {}".format(epoch)) + return self.dataset_to_epoch_iter[dataset] + + assert isinstance(dataset, FairseqDataset) + + # initialize the dataset with the correct starting epoch + dataset.set_epoch(epoch) + + # get indices ordered by example size + with data_utils.numpy_seed(seed): + indices = dataset.ordered_indices() + + # filter examples that are too large + if max_positions is not None: + indices = self.filter_indices_by_size( + indices, dataset, max_positions, ignore_invalid_inputs + ) + + # create mini-batches with given size constraints + batch_sampler = dataset.get_batch_sampler( + indices, + num_shards, + seed, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + split_modality_batch=self.cfg.split_modality_batch, + ) + + # return a reusable, sharded iterator + epoch_iter = iterators.EpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_sampler=batch_sampler, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + buffer_size=data_buffer_size, + skip_remainder_batch=skip_remainder_batch, + disable_shuffling=True, + grouped_shuffling=grouped_shuffling, + ) + + if can_reuse_epoch_itr: + self.dataset_to_epoch_iter[dataset] = epoch_iter + + return epoch_iter + + @classmethod + def _get_size_ratios(cls, ids: List[str], sizes: List[int], alpha: float = 1.0): + """Size ratios for temperature-based sampling + (https://arxiv.org/abs/1907.05019)""" + _sizes = np.array(sizes) + prob = _sizes / _sizes.sum() + smoothed_prob = prob ** alpha + smoothed_prob = smoothed_prob / smoothed_prob.sum() + size_ratio = (smoothed_prob * _sizes.sum()) / _sizes + + o_str = str({_i: f"{prob[i]:.3f}" for i, _i in enumerate(ids)}) + logger.info(f"original sampling probability: {o_str}") + p_str = str({_i: f"{smoothed_prob[i]:.3f}" for i, _i in enumerate(ids)}) + logger.info(f"balanced sampling probability: {p_str}") + sr_str = str({_id: f"{size_ratio[i]:.3f}" for i, _id in enumerate(ids)}) + logger.info(f"balanced sampling size ratio: {sr_str}") + return size_ratio.tolist() + + def resample_multi_modality_dataset(self, speech_dataset, sup_dataset, mono_datasets, paired_datasets, mono_splits, paired_splits, epoch=1, train=True): + assert len(mono_datasets+paired_datasets) > 0, f"No text data loaded!" + + if len(mono_datasets) > 1 and self.cfg.text_sampling_alpha != 1.0: + size_ratios = self._get_size_ratios( + mono_splits, [len(s) for s in mono_datasets], alpha=self.cfg.text_sampling_alpha + ) + mono_datasets = [ + ResamplingDataset( + d, size_ratio=r, seed=0, epoch=epoch, replace=(r >= 1.0) + ) for d, r in zip(mono_datasets, size_ratios) + ] + + if len(paired_datasets) > 1 and self.cfg.text_sampling_alpha != 1.0: + size_ratios = self._get_size_ratios( + paired_splits, [len(s) for s in paired_datasets], alpha=self.cfg.text_sampling_alpha + ) + paired_datasets = [ + ResamplingDataset( + d, size_ratio=r, seed=0, epoch=epoch, replace=(r >= 1.0) + ) for d, r in zip(paired_datasets, size_ratios) + ] + + dataset_list = [speech_dataset, sup_dataset] + for datasets in [mono_datasets, paired_datasets]: + if len(datasets) > 1: + dataset_list.append(ConcatDataset(datasets)) + elif len(datasets) == 1: + dataset_list.append(datasets[0]) + else: + dataset_list.append(None) + + ### match speech/text datasets according to modality + dataset_dict = OrderedDict((name, d) for name, d in zip(["speech", "speech_sup", "text_mono", "text_paired"], dataset_list) if d is not None) + max_positions_dict = { + "speech": None, + "speech_sup": None, + "text_mono": (self.cfg.text_cfg.tokens_per_sample, self.cfg.text_cfg.tokens_per_sample), + "text_paired": (self.cfg.text_cfg.tokens_per_sample, self.cfg.text_cfg.tokens_per_sample), + } + max_positions_dict = OrderedDict((name, max_positions_dict[name]) for name in dataset_dict.keys()) + max_tokens_ratios_dict = { + "speech": 1.0, + "speech_sup": 1.0, + "text_mono": 1.0 / 320 / self.cfg.text_cfg.text_maxtokens_ratio, + "text_paired": 1.0 / 320 / self.cfg.text_cfg.text_maxtokens_ratio, + } + max_tokens_ratios = [max_tokens_ratios_dict[name] for name in dataset_dict.keys()] + dataset_lens = np.array([len(dataset) for dataset in dataset_dict.values()]) + dataset_avg_sample_lens = np.array([ + sum([dataset.num_tokens(i) for i in np.random.randint(low=0, high=len(dataset), size=10000)]) / 10000.0 + for dataset in dataset_dict.values() + ]) + + if not "speech" in dataset_dict: + distributions = [l / sum(dataset_lens) for l in dataset_lens] + else: + ## we just keep the batches of speech and non-speech the same, expand_coef is to ensure speech batches is less than others + first_ratio = dataset_lens[0] / sum(dataset_lens) + expand_coef = 1.8 if sup_dataset is None else 1.1 * sum(dataset_lens[0:2]) / dataset_lens[0] + distributions = [expand_coef * max_tokens_ratios[i] * dataset_avg_sample_lens[0] / l for (i, l) in enumerate(dataset_avg_sample_lens)] + distributions[0] = 1.0 + if sup_dataset is not None: + distributions[1] = dataset_lens[1] / dataset_lens[0] + distributions = [first_ratio * d for d in distributions] + + logging.info(f"Number samples of datasets is {dataset_lens}") + logging.info(f"Avg sample length of datasets is {dataset_avg_sample_lens}") + logging.info(f"Sampling distributions is {distributions}") + logging.info(f"Maxtokens ratio is {max_tokens_ratios}") + return dataset_dict, max_positions_dict, distributions, max_tokens_ratios + + def build_tokenizer(self, cfg=None): + logger.info(f"tokenizer: {self.cfg.hubert_tokenizer}") + if self.cfg.hubert_tokenizer != "none": + return encoders.build_bpe(Namespace(**{"bpe": self.cfg.hubert_tokenizer, "sentencepiece_model": self.cfg.sp_path})) + else: + return None + + def load_char_bart_dataset(self, split): + mono_dataset = data_utils.load_indexed_dataset( + f"{self.cfg.text_cfg.text_data}/{split}", + self.text_dictionary, + ) + mono_dataset = StripTokenDataset(mono_dataset, self.text_dictionary.eos()) + mono_dataset = maybe_shorten_dataset( + mono_dataset, + split, + self.cfg.text_cfg.shorten_data_split_list, + self.cfg.text_cfg.shorten_method, + self.cfg.text_cfg.tokens_per_sample - 2, + self.cfg.text_cfg.seed, + ) + logger.info("loaded {} samples from: {}".format(len(mono_dataset), mono_dataset)) + ### prepend bos and eos to dataset + mono_dataset = PrependTokenDataset(mono_dataset, self.text_dictionary.bos()) + mono_dataset = AppendTokenDataset(mono_dataset, self.text_dictionary.eos()) + mask_whole_words = ( + get_whole_word_mask(None, self.text_dictionary) + if self.cfg.text_cfg.mask_whole_words + else None + ) + lang=self.cfg.speech_tgt_lang + mono_dataset = DenoisingDataset( + mono_dataset, + mono_dataset.sizes, + self.text_dictionary, + self.mask_idx, + mask_whole_words, + shuffle=self.cfg.text_cfg.shuffle_instance, + seed=self.cfg.text_cfg.seed, + args=self.cfg.text_cfg, + tgt_lang_idx=_lang_token_index(self.text_dictionary, lang) if self.cfg.text_cfg.prepend_tgt_lang_tag else None, + ) + + return mono_dataset diff --git a/SpeechT5/SpeechLM/speechlm/unit_generator.py b/SpeechT5/SpeechLM/speechlm/unit_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..93f9b98b473f099a39191b630b181ade2231857d --- /dev/null +++ b/SpeechT5/SpeechLM/speechlm/unit_generator.py @@ -0,0 +1,66 @@ +# ---------------------------------------------------------------------------- +# SpeechLM: Enhanced Speech Pre-Training with Unpaired Textual Data (https://arxiv.org/abs/2209.15329) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +""" +Modified form: https://github.com/facebookresearch/fairseq/blob/272c4c5197250997148fb12c0db6306035f166a4/fairseq/sequence_generator.py +""" + +import torch +import numpy as np + +from fairseq.data.audio.speech_to_text_dataset import S2TDataConfig +from fairseq.speech_generator import SpeechGenerator + +class NonAutoregressiveUnitGenerator(SpeechGenerator): + @torch.no_grad() + def generate(self, model, sample, has_targ=False, **kwargs): + model.eval() + + bsz, max_src_len = sample["net_input"]["src_tokens"].size() + n_frames_per_step = model.encoder.n_frames_per_step + out_dim = model.encoder.out_dim + raw_dim = out_dim // n_frames_per_step + + logit, logit_post, out_lens, log_dur_out, _, _ = model( + src_tokens=sample["net_input"]["src_tokens"], + src_lengths=sample["net_input"]["src_lengths"], + speaker=sample["speaker"], + durations=sample["durations"], + pitches=sample["pitches"], + energies=sample["energies"], + ) + if logit_post is not None: + logit = logit_post + + logit = logit.view(bsz, -1, raw_dim) + pred = logit.argmax(dim=-1) + + ## get duration prediction + src_tokens = sample["net_input"]["src_tokens"] + src_lengths = sample["net_input"]["src_lengths"] + padding_mask = src_tokens.eq(model.encoder.padding_idx) + d_factor = 1.0 ## set by model + dur_out = torch.clamp( + torch.round((torch.exp(log_dur_out) - 1) * d_factor).long(), min=0 + ) + dur_out.masked_fill_(padding_mask, 0) + x = src_tokens.unsqueeze(-1) + x, src_out_lens = model.encoder.var_adaptor.length_regulator(x, dur_out) + fa_src_tokens = x.view(bsz, -1) + + finalized = [ + { + "unit": pred[b, :l], + "fa_src": fa_src_tokens[b, :l], + "duration": dur_out[b, :L], + } + for b, l, L in zip(range(bsz), out_lens, src_lengths) + ] + + return finalized diff --git a/SpeechT5/SpeechT5/README.md b/SpeechT5/SpeechT5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4b0b2bf4c65aad20e47c2f6a7d3626d5bbe1c99 --- /dev/null +++ b/SpeechT5/SpeechT5/README.md @@ -0,0 +1,706 @@ +# SpeechT5 + + + + [**SpeechT5**](https://arxiv.org/abs/2110.07205): **Unified-Modal Encoder-Decoder Pre-training for Spoken Language Processing** + +Official PyTorch implementation and pretrained models of SpeechT5 + +- Oct 2021: release preprint in [arXiv](https://arxiv.org/abs/2110.07205) +- Feb 2022: accepted by [ACL 2022](https://www.2022.aclweb.org/) + + +## Pre-Trained Models + +| Model | Pre-training Dataset | Fine-tuning Dataset | Model | +| :------: | :----------------------------------------------: | :-----------------: | :-----: | +| SpeechT5 Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [LibriSpeech LM Dataset](https://www.openslr.org/11/) | - | [HuggingFace](https://huggingface.co/ajyy/SpeechT5/resolve/main/speecht5_base.pt)
[Google Drive](https://drive.google.com/file/d/1Sq00uZ1pw6Z4OUaqhOWzQEJxIVWgAO5U/view?usp=sharing) | +| SpeechT5 Base | [960 hrs LibriSpeech](http://www.openslr.org/12) + [LibriSpeech LM Dataset](https://www.openslr.org/11/) | [100 hrs LibriSpeech](http://www.openslr.org/12) | [HuggingFace](https://huggingface.co/ajyy/SpeechT5/resolve/main/speecht5_base_asr.pt)
[Google Drive](https://drive.google.com/file/d/1qLKJ81JPWOGf1MHfjSmgtZyqqTqgI6kT/view?usp=sharing) | +| SpeechT5 Large | [60k hrs Libri-Light](https://github.com/facebookresearch/libri-light) + [LibriSpeech LM Dataset](https://www.openslr.org/11/) | - | [Google Drive](https://drive.google.com/file/d/1M79b1jetSPOVxWVMIX-y0URvDjNskZKp/view?usp=sharing) | + +## Language Model and Vocabulary +| Model | Dataset | Model | Vocabulary | SPM Model | +| :------: | :------: | :---: | :--------: | :-------: | +| LM | [LibriSpeech LM Dataset](https://www.openslr.org/11/) | [LM Model](https://drive.google.com/uc?export=download&id=1y0TGnKAMKUW5C8l8yrvGjh9RRZETPdv7) | [Vocabulary](https://drive.google.com/uc?export=download&id=19hcQ58RHZ6CssxF8Qp6yEF1NW_AXxObK) | [SPM Model](https://drive.google.com/uc?export=download&id=1wClgQjXXoU2lmpbaEa1v2SqMbg7cAutq) | + + +## Setup +``` +git submodule update --init SpeechT5/fairseq +cd SpeechT5/ +pip install --editable fairseq/ +pip install espnet +``` + + + +## Load Pre-Trained Models + +```python +import torch +from speecht5.tasks.speecht5 import SpeechT5Task +from speecht5.models.speecht5 import T5TransformerModel + +checkpoint = torch.load('/path/to/speecht5_checkpoint') + +checkpoint['cfg']['task'].t5_task = 'pretrain' +checkpoint['cfg']['task'].hubert_label_dir = "/path/to/hubert_label" +checkpoint['cfg']['task'].data = "/path/to/tsv_file" + +task = SpeechT5Task.setup_task(checkpoint['cfg']['task']) +model = T5TransformerModel.build_model(checkpoint['cfg']['model'], task) +model.load_state_dict(checkpoint['model']) +``` + +## Data Preparation + +### Speech data and S2T Data +Please follow the steps for preparing wav2vec 2.0 manifest in [here](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec#prepare-training-data-manifest) and preparing HuBERT label in [here](https://github.com/facebookresearch/fairseq/tree/main/examples/hubert/simple_kmeans). + +We add a third column for the speaker embedding, which is provided in [here](https://drive.google.com/uc?export=download&id=16QOUURZBrW7-GYbVG_gXt3mTMlZmQoH0). +It includes the speaker embeddings for 960hr training data and dev-other data of LibriSpeech. + +We also provide example manifests for your reference in [here](https://drive.google.com/drive/folders/1Ja08XjOHe6vP8lZtLVrJM8173aPQCR_y?usp=sharing). + +### Text Data +Please use [fairseq-preprocess](https://fairseq.readthedocs.io/en/latest/command_line_tools.html#fairseq-preprocess) to generate the index and bin files of the text data. Note that we use sentencepiece to pre-process the text, so please refer to [here](https://github.com/microsoft/SpeechT5/tree/main/SpeechT5#language-model-and-vocabulary) to download the SPM model and dictionary for preparing text data. This means you firstly need to use the SPM model to process the text and then use [fairseq-preprocess](https://fairseq.readthedocs.io/en/latest/command_line_tools.html#fairseq-preprocess) with the provided dictionary to get the index and bin files. + +## Pre-Training + +### 960hr LibriSpeech + LibriSpeech-LM + +``` +DATA_ROOT= +SAVE_DIR= +LABEL_DIR= +TRAIN_SET="speech_train|text_train" +VALID_SET="speech_valid|text_valid" + + +fairseq-train ${DATA_ROOT} \ + --save-dir ${SAVE_DIR} \ + --tensorboard-logdir ${SAVE_DIR} \ + --train-subset ${TRAIN_SET} \ + --valid-subset ${VALID_SET} \ + --hubert-label-dir ${LABEL_DIR} \ + --distributed-world-size 32 \ + --distributed-port 0 \ + --ddp-backend legacy_ddp \ + --user-dir SpeechT5/speecht5 \ + --log-format json \ + --seed 1337 \ + --fp16 \ + \ + --task speecht5 \ + --t5-task pretrain \ + --label-rates 50 \ + --sample-rate 16000 \ + --random-crop \ + \ + --num-workers 0 \ + --max-tokens 1400000 \ + --max-speech-sample-size 250000 \ + --update-freq 2 \ + --batch-ratio "[1,0.0086]" \ + \ + --criterion speecht5 \ + --optimizer adam \ + --reset-optimizer \ + --adam-betas "(0.9, 0.98)" \ + --adam-eps 1e-06 \ + --weight-decay 0.01 \ + --power 1 \ + --clip-norm 5.0 \ + --lr 0.0002 \ + --lr-scheduler polynomial_decay \ + \ + --max-update 800000 \ + --warmup-updates 64000 \ + --total-num-update 800000 \ + --save-interval-updates 3000 \ + --skip-invalid-size-inputs-valid-test \ + --required-batch-size-multiple 1 \ + \ + --arch t5_transformer_base \ + --share-input-output-embed \ + --find-unused-parameters \ + --bert-init \ + --relative-position-embedding \ + --use-codebook \ + --codebook-prob 0.1 \ + --loss-weights="[10,0.1]" \ + --max-text-positions 600 \ +``` + +## Finetune + +### ASR + +The fine-tuned ASR model can be used directly using Hugging Face Transformers. The checkpoint is available at [hf.co/microsoft/speecht5_asr](https://huggingface.co/microsoft/speecht5_asr). An interactive demo is [available here](https://huggingface.co/spaces/Matthijs/speecht5-asr-demo). + +#### Training + +``` +DATA_ROOT= +SAVE_DIR= +TRAIN_SET= +VALID_SET= +LABEL_DIR= +BPE_TOKENIZER= +USER_DIR= +PT_CHECKPOINT_PATH= + +mkdir -p ${SAVE_DIR} +fairseq-train ${DATA_ROOT} \ + --save-dir ${SAVE_DIR} \ + --tensorboard-logdir ${SAVE_DIR} \ + --train-subset ${TRAIN_SET} \ + --valid-subset ${VALID_SET} \ + --hubert-label-dir ${LABEL_DIR} \ + --distributed-world-size 8 \ + --distributed-port 0 \ + --ddp-backend legacy_ddp \ + --user-dir ${USER_DIR} \ + --log-format json \ + --seed 1 \ + --fp16 \ + \ + --task speecht5 \ + --t5-task s2t \ + --sample-rate 16000 \ + --num-workers 0 \ + --max-tokens 1600000 \ + --update-freq 2 \ + --bpe-tokenizer ${BPE_TOKENIZER} \ + \ + --criterion speecht5 \ + --report-accuracy \ + --zero-infinity \ + --ce-weight 0.5 \ + --ctc-weight 0.5 \ + --sentence-avg \ + \ + --optimizer adam \ + --adam-betas "(0.9, 0.98)" \ + --adam-eps 1e-08 \ + --weight-decay 0.1 \ + --clip-norm 25.0 \ + --lr 0.00006 \ + --lr-scheduler tri_stage \ + --phase-ratio "[0.1, 0.4, 0.5]" \ + --final-lr-scale 0.05 \ + \ + --max-update 80000 \ + --max-text-positions 600 \ + --required-batch-size-multiple 1 \ + --save-interval-updates 3000 \ + --skip-invalid-size-inputs-valid-test \ + \ + --arch t5_transformer_base_asr \ + --share-input-output-embed \ + --find-unused-parameters \ + --bert-init \ + --relative-position-embedding \ + --freeze-encoder-updates 13000 \ + \ + --keep-last-epochs 10 \ + --feature-grad-mult 1.0 \ + --best-checkpoint-metric s2t_accuracy \ + --maximize-best-checkpoint-metric \ + --finetune-from-model ${PT_CHECKPOINT_PATH} +``` + +#### Inference +Note that joint CTC/Decoder inference is only supported when batch size is 1. + +``` +CHECKPOINT_PATH= +DATA_ROOT= +SUBSET= +BPE_TOKENIZER= +LABEL_DIR= +USER_DIR= +BEAM= +MAX_TOKENS= +CTC_WEIGHT= +LM_WEIGHT= +LM_PATH= + +fairseq-generate ${DATA_ROOT} \ + --gen-subset ${SUBSET} \ + --bpe-tokenizer ${BPE_TOKENIZER} \ + --user-dir ${USER_DIR} \ + --task speecht5 \ + --t5-task s2t \ + --path ${CHECKPOINT_PATH} \ + --hubert-label-dir ${LABEL_DIR} \ + --ctc-weight ${CTC_WEIGHT} \ + --lm-weight ${LM_WEIGHT} \ + --lm-path ${LM_PATH} \ + --max-tokens ${MAX_TOKENS} \ + --beam ${BEAM} \ + --scoring wer \ + --max-len-a 0 \ + --max-len-b 620 \ + --sample-rate 16000 +``` + +### TTS + +The manifest and pre-trained vocoder can be found in [huggingface](https://huggingface.co/mechanicalsea/speecht5-tts), which may be helpful to reproduce the results of SpeechT5 TTS model. + +We also provide re-implementation of TTS fine-tuned model [speecht5_tts.pt](https://huggingface.co/mechanicalsea/speecht5-tts/blob/main/speecht5_tts.pt), but with a smaller batch size or max updates, which can be helpful. + +This fine-tuned TTS model can also be used directly using Hugging Face Transformers. The checkpoint is available at [hf.co/microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts). An interactive demo is [available here](https://huggingface.co/spaces/Matthijs/speecht5-tts-demo). Also see [this Colab notebook](https://colab.research.google.com/drive/1i7I5pzBcU3WDFarDnzweIj4-sVVoIUFJ) on how to fine-tune SpeechT5 for TTS using Hugging Face. + +#### Training + +``` +DATA_ROOT= +SAVE_DIR= +TRAIN_SET= +VALID_SET= +LABEL_DIR= +BPE_TOKENIZER= +USER_DIR= +PT_CHECKPOINT_PATH= + +fairseq-train ${DATA_ROOT} \ + --save-dir ${SAVE_DIR} \ + --tensorboard-logdir ${SAVE_DIR} \ + --train-subset ${TRAIN_SET} \ + --valid-subset ${VALID_SET} \ + --hubert-label-dir ${LABEL_DIR} \ + --distributed-world-size 8 \ + --distributed-port 0 \ + --ddp-backend legacy_ddp \ + --user-dir ${USER_DIR} \ + --log-format json \ + --seed 1 \ + --fp16 \ + \ + --task speecht5 \ + --t5-task t2s \ + --sample-rate 16000 \ + --num-workers 4 \ + --max-tokens 3200000 \ + --update-freq 1 \ + --bpe-tokenizer ${BPE_TOKENIZER} \ + --max-tokens-valid 3200000 \ + \ + --criterion speecht5 \ + --use-guided-attn-loss \ + --report-accuracy \ + --sentence-avg \ + \ + --optimizer adam \ + --adam-betas "(0.9, 0.98)" \ + --dropout 0.15 \ + --activation-dropout 0.15 \ + --attention-dropout 0.15 \ + --encoder-layerdrop 0.0 \ + --decoder-layerdrop 0.0 \ + --weight-decay 0.0 \ + --clip-norm 25.0 \ + --lr 0.0001 \ + --lr-scheduler inverse_sqrt \ + --warmup-updates 10000 \ + --feature-grad-mult 1.0 \ + \ + --max-update 120000 \ + --max-text-positions 600 \ + --min-speech-sample-size 1056 \ + --max-speech-sample-size 480256 \ + --max-speech-positions 1876 \ + --required-batch-size-multiple 1 \ + --skip-invalid-size-inputs-valid-test \ + --keep-last-epochs 10 \ + --validate-after-updates 20000 \ + --validate-interval 50 \ + --log-interval 10 \ + \ + --arch t5_transformer_base_asr \ + --share-input-output-embed \ + --find-unused-parameters \ + --bert-init \ + --relative-position-embedding \ + --freeze-encoder-updates 20000 \ + \ + --finetune-from-model ${PT_CHECKPOINT_PATH} +``` + +#### Inference + +Generating speech is available only if batch size is 1. + +``` +SPEECHT5_CODE_DIR= +CHECKPOINT_PATH= +DATA_ROOT= +SUBSET= +BPE_TOKENIZER= +LABEL_DIR= +USER_DIR= +RESULTS_PATH= + +python3 ${SPEECHT5_CODE_DIR}/SpeechT5/scripts/generate_speech.py ${DATA_ROOT} \ + --gen-subset ${SUBSET} \ + --bpe-tokenizer ${BPE_TOKENIZER} \ + --user-dir ${USER_DIR} \ + --task speecht5 \ + --t5-task t2s \ + --path ${CHECKPOINT_PATH} \ + --hubert-label-dir ${LABEL_DIR} \ + --batch-size 1 \ + --results-path ${RESULTS_PATH} \ + --sample-rate 16000 +``` + +### ST + +Here we follow [fairseq/speech_to_text/mustc](https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_text/docs/mustc_example.md#data-preparation) to generate vocabulary, which is different from the pre-trained models. So we randomly initilize the embedding table of the pre-trained models during fine-tuning. + +#### Training + +``` +DATA_ROOT= +SAVE_DIR= +TRAIN_SET= +VALID_SET= +LABEL_DIR= +BPE_TOKENIZER= +USER_DIR= +PT_CHECKPOINT_PATH= + +fairseq-train ${DATA_ROOT} \ + --save-dir ${SAVE_DIR} \ + --tensorboard-logdir ${SAVE_DIR} \ + --train-subset ${TRAIN_SET} \ + --valid-subset ${VALID_SET} \ + --hubert-label-dir ${LABEL_DIR} \ + --distributed-world-size 8 \ + --distributed-port 0 \ + --ddp-backend legacy_ddp \ + --user-dir ${USER_DIR} \ + --log-format json \ + --seed 1 \ + --fp16 \ + \ + --task speecht5 \ + --t5-task s2t \ + --sample-rate 16000 \ + --num-workers 6 \ + --max-tokens 480256 \ + --update-freq 4 \ + --bpe-tokenizer ${BPE_TOKENIZER} \ + --max-tokens-valid 3200000 \ + \ + --criterion speecht5 \ + --label-smoothing 0.1 \ + --report-accuracy \ + --sentence-avg \ + \ + --optimizer adam \ + --adam-betas "(0.9, 0.98)" \ + --weight-decay 0.0 \ + --clip-norm 10.0 \ + --lr 0.0002 \ + --lr-scheduler inverse_sqrt \ + --warmup-updates 25000 \ + --feature-grad-mult 1.0 \ + \ + --max-update 80000 \ + --max-text-positions 600 \ + --min-speech-sample-size 1056 \ + --max-speech-sample-size 480256 \ + --max-speech-positions 1876 \ + --required-batch-size-multiple 1 \ + --skip-invalid-size-inputs-valid-test \ + --keep-last-epochs 10 \ + \ + --arch t5_transformer_base_asr \ + --share-input-output-embed \ + --find-unused-parameters \ + --bert-init \ + --relative-position-embedding \ + --freeze-encoder-updates 0 \ + --mask-prob 0.5 \ + --mask-channel-prob 0.5 \ + \ + --finetune-from-model ${PT_CHECKPOINT_PATH} +``` + +#### Inference + +``` +FAIRSEQ_DIR= +CHECKPOINT_PATH= +DATA_ROOT= +BPE_TOKENIZER= +LABEL_DIR= +USER_DIR= +MAX_TOKENS= + +python3 ${FAIRSEQ_DIR}/scripts/average_checkpoints.py \ + --inputs ${CHECKPOINT_PATH} \ + --num-epoch-checkpoints 10 \ + --output ${CHECKPOINT_PATH}/avg_last_10_checkpoint.pt + +fairseq-generate ${DATA_ROOT} \ + --gen-subset tst-COMMON \ + --bpe-tokenizer ${BPE_TOKENIZER} \ + --user-dir ${USER_DIR} \ + --task speecht5 \ + --t5-task s2t \ + --path ${CHECKPOINT_PATH}/avg_last_10_checkpoint.pt \ + --hubert-label-dir ${LABEL_DIR} \ + --max-tokens ${MAX_TOKENS} \ + --min-speech-sample-size 1056 \ + --beam 5 \ + --scoring sacrebleu \ + --max-len-a 0 \ + --max-len-b 620 \ + --sample-rate 16000 +``` + +### VC + +The manifest and pre-trained vocoder can be found in [huggingface](https://huggingface.co/mechanicalsea/speecht5-vc), which may be helpful to reproduce the results of SpeechT5 VC model. + +We also provide re-implementation of VC fine-tuned model [speecht5_vc.pt](https://huggingface.co/mechanicalsea/speecht5-vc/blob/main/speecht5_vc.pt), but with a smaller batch size or max updates, which can be helpful. + +This fine-tuned VC model can also be used directly using Hugging Face Transformers. The checkpoint is available at [hf.co/microsoft/speecht5_vc](https://huggingface.co/microsoft/speecht5_vc). An interactive demo is [available here](https://huggingface.co/spaces/Matthijs/speecht5-vc-demo). + +#### Training + + +``` +DATA_ROOT= +SAVE_DIR= +TRAIN_SET= +VALID_SET= +LABEL_DIR= +BPE_TOKENIZER= +USER_DIR= +PT_CHECKPOINT_PATH= + +fairseq-train ${DATA_ROOT} \ + --save-dir ${SAVE_DIR} \ + --tensorboard-logdir ${SAVE_DIR} \ + --train-subset ${TRAIN_SET} \ + --valid-subset ${VALID_SET} \ + --hubert-label-dir ${LABEL_DIR} \ + --distributed-world-size 8 \ + --distributed-port 0 \ + --ddp-backend legacy_ddp \ + --user-dir ${USER_DIR} \ + --log-format json \ + --seed 1 \ + --fp16 \ + \ + --task speecht5 \ + --t5-task s2s \ + --sample-rate 16000 \ + --num-workers 4 \ + --max-tokens 1280000 \ + --update-freq 3 \ + --max-tokens-valid 1280000 \ + \ + --criterion speecht5 \ + --use-guided-attn-loss \ + --report-accuracy \ + --sentence-avg \ + \ + --optimizer adam \ + --dropout 0.2 \ + --activation-dropout 0.2 \ + --attention-dropout 0.2 \ + --encoder-layerdrop 0.05 \ + --decoder-layerdrop 0.0 \ + --clip-norm 1.0 \ + --lr 0.0001 \ + --lr-scheduler inverse_sqrt \ + --warmup-updates 6000 \ + --feature-grad-mult 1.0 \ + \ + --max-update 60000 \ + --max-text-positions 600 \ + --min-speech-sample-size 1056 \ + --max-speech-sample-size 480256 \ + --max-speech-positions 1876 \ + --required-batch-size-multiple 1 \ + --skip-invalid-size-inputs-valid-test \ + --keep-last-epochs 10 \ + --save-interval-updates 10000 \ + --disable-validation \ + --log-interval 10 \ + \ + --arch t5_transformer_base_asr \ + --share-input-output-embed \ + --find-unused-parameters \ + --bert-init \ + --relative-position-embedding \ + --mask-prob 0.0 \ + --mask-channel-prob 0.0 \ + \ + --finetune-from-model ${PT_CHECKPOINT_PATH} +``` + +#### Inference + +Generating speech is available only if batch size is 1. + +``` +SPEECHT5_CODE_DIR= +CHECKPOINT_PATH= +DATA_ROOT= +SUBSET= +LABEL_DIR= +USER_DIR= +RESULTS_PATH= + +python3 ${SPEECHT5_CODE_DIR}/SpeechT5/scripts/generate_speech.py ${DATA_ROOT} \ + --gen-subset test \ + --user-dir ${USER_DIR} \ + --task speecht5 \ + --t5-task s2s \ + --path ${CHECKPOINT_PATH} \ + --hubert-label-dir ${LABEL_DIR} \ + --batch-size 1 \ + --results-path ${RESULTS_PATH} \ + --sample-rate 16000 +``` + +### SID + +The manifest can be found in [huggingface](https://huggingface.co/mechanicalsea/speecht5-sid), which may be helpful to reproduce the results of SpeechT5 SID model. + +We also provide re-implementation of SID fine-tuned model [speecht5_sid.pt](https://huggingface.co/mechanicalsea/speecht5-sid/blob/main/speecht5_sid.pt) with training log and results, **but in a smaller batch size**, which can be helpful. + +#### Training + + +``` +DATA_ROOT= +SAVE_DIR= +TRAIN_SET= +VALID_SET= +USER_DIR= +PT_CHECKPOINT_PATH= + +mkdir -p ${SAVE_DIR} + +fairseq-train ${DATA_ROOT} \ + --save-dir ${SAVE_DIR} \ + --tensorboard-logdir ${SAVE_DIR} \ + --train-subset ${TRAIN_SET} \ + --valid-subset ${VALID_SET} \ + --user-dir ${USER_DIR} \ + --distributed-world-size 8 \ + --distributed-port 0 \ + --ddp-backend legacy_ddp \ + --log-format json \ + --seed 1 \ + --fp16 \ + \ + --task speecht5 \ + --t5-task s2c \ + --sample-rate 16000 \ + --num-workers 4 \ + --batch-size 8 \ + --update-freq 2 \ + --data-buffer-size 0 \ + \ + --criterion speecht5 \ + --report-accuracy \ + --best-checkpoint-metric "s2c_accuracy" \ + --maximize-best-checkpoint-metric \ + \ + --optimizer adam \ + --dropout 0.1 \ + --activation-dropout 0.1 \ + --attention-dropout 0.1 \ + --encoder-layerdrop 0.05 \ + --lr-scheduler triangular \ + --max-lr 2e-4 \ + --lr-period-updates 60000 \ + --lr-shrink 0.5 \ + --lr 1e-8 \ + --feature-grad-mult 1.0 \ + --weight-decay 0.1 \ + \ + --max-update 60000 \ + --max-text-positions 600 \ + --max-speech-positions 8000 \ + --required-batch-size-multiple 1 \ + --skip-invalid-size-inputs-valid-test \ + --save-interval-updates 10000 \ + --validate-after-updates 20000 \ + --no-epoch-checkpoints \ + --log-interval 10 \ + \ + --arch t5_transformer_base_asr \ + --share-input-output-embed \ + --find-unused-parameters \ + --bert-init \ + --relative-position-embedding \ + --mask-prob 0.0 \ + --mask-channel-prob 0.0 \ + --sid-no-pooling-bn \ + --sid-no-embed-postnet \ + \ + --finetune-from-model ${PT_CHECKPOINT_PATH} +``` + +#### Inference + + +``` +CHECKPOINT_PATH= +DATA_ROOT= +SUBSET= +USER_DIR= +RESULTS_PATH= + +mkdir -p ${RESULTS_PATH} + +python scripts/generate_class.py ${DATA_ROOT} \ + --gen-subset ${SUBSET} \ + --user-dir ${USER_DIR} \ + --log-format json \ + --task speecht5 \ + --t5-task s2c \ + --path ${CHECKPOINT_PATH} \ + --results-path ${RESULTS_PATH} \ + --batch-size 1 \ + --max-speech-positions 8000 \ + --sample-rate 16000 +``` + +## License + +This project is licensed under the license found in the LICENSE file in the root directory of this source tree. +Portions of the source code are based on the [FAIRSEQ](https://github.com/pytorch/fairseq) and [ESPnet](https://github.com/espnet/espnet) projects. + +[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct) + +### Reference + +If you find our work is useful in your research, please cite the following paper: + +```bibtex +@article{Ao2021SpeechT5, + title = {SpeechT5: Unified-Modal Encoder-Decoder Pre-training for Spoken Language Processing}, + author = {Junyi Ao and Rui Wang and Long Zhou and Chengyi Wang and Shuo Ren and Yu Wu and Shujie Liu and Tom Ko and Qing Li and Yu Zhang and Zhihua Wei and Yao Qian and Jinyu Li and Furu Wei}, + eprint={2110.07205}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + year={2021} +} +``` + +### Contact Information + +For help or issues using SpeechT5 models, please submit a GitHub issue. + +For other communications related to SpeechT5, please contact Long Zhou (`lozhou@microsoft.com`). diff --git a/SpeechT5/SpeechT5/results/ablation_study.png b/SpeechT5/SpeechT5/results/ablation_study.png new file mode 100644 index 0000000000000000000000000000000000000000..9e3fb62a4a0639e97960ab6d68d48a4ee18c734a Binary files /dev/null and b/SpeechT5/SpeechT5/results/ablation_study.png differ diff --git a/SpeechT5/SpeechT5/results/asr.png b/SpeechT5/SpeechT5/results/asr.png new file mode 100644 index 0000000000000000000000000000000000000000..b0250a4dea3415e2bd1cb15a21177c689bb56c48 Binary files /dev/null and b/SpeechT5/SpeechT5/results/asr.png differ diff --git a/SpeechT5/SpeechT5/results/se.png b/SpeechT5/SpeechT5/results/se.png new file mode 100644 index 0000000000000000000000000000000000000000..3c0f55baa190534e8c72fabee1f6911475758a3f Binary files /dev/null and b/SpeechT5/SpeechT5/results/se.png differ diff --git a/SpeechT5/SpeechT5/results/sid.png b/SpeechT5/SpeechT5/results/sid.png new file mode 100644 index 0000000000000000000000000000000000000000..d7e464af982f9ccd4affa58f8db37ba4dec4cf83 Binary files /dev/null and b/SpeechT5/SpeechT5/results/sid.png differ diff --git a/SpeechT5/SpeechT5/results/st.png b/SpeechT5/SpeechT5/results/st.png new file mode 100644 index 0000000000000000000000000000000000000000..711add5776a52b7691db5ac14fabb4453660c07e Binary files /dev/null and b/SpeechT5/SpeechT5/results/st.png differ diff --git a/SpeechT5/SpeechT5/results/tts.png b/SpeechT5/SpeechT5/results/tts.png new file mode 100644 index 0000000000000000000000000000000000000000..6d06326844de2c57437ad5a27467f68fb5d28c12 Binary files /dev/null and b/SpeechT5/SpeechT5/results/tts.png differ diff --git a/SpeechT5/SpeechT5/results/vc.png b/SpeechT5/SpeechT5/results/vc.png new file mode 100644 index 0000000000000000000000000000000000000000..ce9753efe328ef00c7239533f39865680d2d2f34 Binary files /dev/null and b/SpeechT5/SpeechT5/results/vc.png differ diff --git a/SpeechT5/SpeechT5/scripts/generate_class.py b/SpeechT5/SpeechT5/scripts/generate_class.py new file mode 100644 index 0000000000000000000000000000000000000000..d3f5a909523a548cb025d0efc3b8ac7435e7f036 --- /dev/null +++ b/SpeechT5/SpeechT5/scripts/generate_class.py @@ -0,0 +1,153 @@ +import ast +import logging +import os +import sys +from argparse import Namespace + +import numpy as np +import torch +from fairseq import checkpoint_utils, options, tasks, utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.logging import progress_bar +from omegaconf import DictConfig + + +def main(cfg: DictConfig): + if isinstance(cfg, Namespace): + cfg = convert_namespace_to_omegaconf(cfg) + + assert cfg.common_eval.path is not None, "--path required for generation!" + assert ( + cfg.generation.replace_unk is None or cfg.dataset.dataset_impl == "raw" + ), "--replace-unk requires a raw text dataset (--dataset-impl=raw)" + + if cfg.common_eval.results_path is not None: + os.makedirs(cfg.common_eval.results_path, exist_ok=True) + + return _main(cfg, sys.stdout) + + +def _main(cfg: DictConfig, output_file): + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=output_file, + ) + logger = logging.getLogger("speecht5.generate_class") + + utils.import_user_module(cfg.common) + + assert cfg.dataset.batch_size == 1, "only support batch size 1" + logger.info(cfg) + + # Fix seed for stochastic decoding + if cfg.common.seed is not None and not cfg.generation.no_seed_provided: + np.random.seed(cfg.common.seed) + utils.set_torch_seed(cfg.common.seed) + + use_cuda = torch.cuda.is_available() and not cfg.common.cpu + if not use_cuda: + logger.info("generate speech on cpu") + + # build task + task = tasks.setup_task(cfg.task) + + # Load ensemble + logger.info("loading model(s) from {}".format(cfg.common_eval.path)) + overrides = ast.literal_eval(cfg.common_eval.model_overrides) + models, saved_cfg = checkpoint_utils.load_model_ensemble( + utils.split_paths(cfg.common_eval.path), + arg_overrides=overrides, + task=task, + suffix=cfg.checkpoint.checkpoint_suffix, + strict=(cfg.checkpoint.checkpoint_shard_count == 1), + num_shards=cfg.checkpoint.checkpoint_shard_count, + ) + logger.info(saved_cfg) + + # loading the dataset should happen after the checkpoint has been loaded so we can give it the saved task config + task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task) + + # optimize ensemble for generation + for model in models: + if model is None: + continue + if cfg.common.fp16: + model.half() + if use_cuda and not cfg.distributed_training.pipeline_model_parallel: + model.cuda() + model.prepare_for_inference_(cfg) + + # load dataset (possibly sharded) + itr = task.get_batch_iterator( + dataset=task.dataset(cfg.dataset.gen_subset), + max_tokens=cfg.dataset.max_tokens, + max_sentences=cfg.dataset.batch_size, + max_positions=None, + ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=cfg.dataset.required_batch_size_multiple, + seed=cfg.common.seed, + num_shards=cfg.distributed_training.distributed_world_size, + shard_id=cfg.distributed_training.distributed_rank, + num_workers=cfg.dataset.num_workers, + data_buffer_size=cfg.dataset.data_buffer_size, + ).next_epoch_itr(shuffle=False) + progress = progress_bar.progress_bar( + itr, + log_format=cfg.common.log_format, + log_interval=cfg.common.log_interval, + default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), + ) + + n_correct = 0 + n_total = 0 + assert hasattr(task.dataset(cfg.dataset.gen_subset), "tgt_dict") + dict_class = task.dataset(cfg.dataset.gen_subset).tgt_dict + for i, sample in enumerate(progress): + if "net_input" not in sample or "source" not in sample["net_input"]: + continue + sample = utils.move_to_cuda(sample) if use_cuda else sample + prefix_tokens = utils.move_to_cuda( + torch.LongTensor([[dict_class.eos()] for _ in range(len(sample["net_input"]["source"]))]) + ) + + outs = task.generate_class( + models, + sample["net_input"], + prefix_tokens, + ) + prediction = outs.detach().cpu().tolist() + categories = [dict_class[predi] for predi in prediction] + + if "target" in sample: + target = sample["target"].squeeze(1).detach().cpu().tolist() + labels = [dict_class[tgti] for tgti in target] + + n_total += len(categories) + if "target" in sample: + r_correct = [] + for ci, li in zip(categories, labels): + if ci == li: + n_correct += 1 + r_correct.append(True) + else: + r_correct.append(False) + + logger.info( + f"{i} (size: {sample['net_input']['source'].shape}) -> {prediction} ({categories}) " + + f"<- target: {target} ({labels})\t{r_correct}" if "target" in sample else "" + ) + logger.info( + f"Accuracy on {cfg.dataset.gen_subset}: {n_correct*100.0/n_total:.3f} ({n_correct}/{n_total})" + ) + + +def cli_main(): + parser = options.get_generation_parser() + args = options.parse_args_and_arch(parser) + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/SpeechT5/SpeechT5/scripts/generate_speech.py b/SpeechT5/SpeechT5/scripts/generate_speech.py new file mode 100644 index 0000000000000000000000000000000000000000..deed3e4a552660909e1b4088ab0c90e66206fea4 --- /dev/null +++ b/SpeechT5/SpeechT5/scripts/generate_speech.py @@ -0,0 +1,199 @@ +import ast +import logging +import os +import os.path as op +import sys +from argparse import Namespace + +import numpy as np +import torch +from fairseq import checkpoint_utils, options, tasks, utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.logging import progress_bar +from omegaconf import DictConfig + + +# define function for plot prob and att_ws +def _plot_and_save(array, figname, figsize=(6, 4), dpi=150): + import matplotlib.pyplot as plt + + shape = array.shape + if len(shape) == 1: + # for eos probability + plt.figure(figsize=figsize, dpi=dpi) + plt.plot(array) + plt.xlabel("Frame") + plt.ylabel("Probability") + plt.ylim([0, 1]) + elif len(shape) == 2: + # for tacotron 2 attention weights, whose shape is (out_length, in_length) + plt.figure(figsize=figsize, dpi=dpi) + plt.imshow(array, aspect="auto") + elif len(shape) == 4: + # for transformer attention weights, + # whose shape is (#leyers, #heads, out_length, in_length) + plt.figure(figsize=(figsize[0] * shape[0], figsize[1] * shape[1]), dpi=dpi) + for idx1, xs in enumerate(array): + for idx2, x in enumerate(xs, 1): + plt.subplot(shape[0], shape[1], idx1 * shape[1] + idx2) + plt.imshow(x, aspect="auto") + plt.xlabel("Input") + plt.ylabel("Output") + else: + raise NotImplementedError("Support only from 1D to 4D array.") + plt.tight_layout() + if not op.exists(op.dirname(figname)): + # NOTE: exist_ok = True is needed for parallel process decoding + os.makedirs(op.dirname(figname), exist_ok=True) + plt.savefig(figname) + plt.close() + + +# define function to calculate focus rate +# (see section 3.3 in https://arxiv.org/abs/1905.09263) +def _calculate_focus_rete(att_ws): + if att_ws is None: + # fastspeech case -> None + return 1.0 + elif len(att_ws.shape) == 2: + # tacotron 2 case -> (L, T) + return float(att_ws.max(dim=-1)[0].mean()) + elif len(att_ws.shape) == 4: + # transformer case -> (#layers, #heads, L, T) + return float(att_ws.max(dim=-1)[0].mean(dim=-1).max()) + else: + raise ValueError("att_ws should be 2 or 4 dimensional tensor.") + + +def main(cfg: DictConfig): + if isinstance(cfg, Namespace): + cfg = convert_namespace_to_omegaconf(cfg) + + assert cfg.common_eval.path is not None, "--path required for generation!" + assert ( + cfg.generation.replace_unk is None or cfg.dataset.dataset_impl == "raw" + ), "--replace-unk requires a raw text dataset (--dataset-impl=raw)" + + if cfg.common_eval.results_path is not None: + os.makedirs(cfg.common_eval.results_path, exist_ok=True) + + return _main(cfg, sys.stdout) + + +def _main(cfg: DictConfig, output_file): + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=output_file, + ) + logger = logging.getLogger("speecht5.generate_speech") + + utils.import_user_module(cfg.common) + + assert cfg.dataset.batch_size == 1, "only support batch size 1" + logger.info(cfg) + + # Fix seed for stochastic decoding + if cfg.common.seed is not None and not cfg.generation.no_seed_provided: + np.random.seed(cfg.common.seed) + utils.set_torch_seed(cfg.common.seed) + + use_cuda = torch.cuda.is_available() and not cfg.common.cpu + if not use_cuda: + logger.info("generate speech on cpu") + + # build task + task = tasks.setup_task(cfg.task) + + # Load ensemble + logger.info("loading model(s) from {}".format(cfg.common_eval.path)) + overrides = ast.literal_eval(cfg.common_eval.model_overrides) + models, saved_cfg = checkpoint_utils.load_model_ensemble( + utils.split_paths(cfg.common_eval.path), + arg_overrides=overrides, + task=task, + suffix=cfg.checkpoint.checkpoint_suffix, + strict=(cfg.checkpoint.checkpoint_shard_count == 1), + num_shards=cfg.checkpoint.checkpoint_shard_count, + ) + logger.info(saved_cfg) + + # loading the dataset should happen after the checkpoint has been loaded so we can give it the saved task config + task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task) + + # optimize ensemble for generation + for model in models: + if model is None: + continue + if cfg.common.fp16: + model.half() + if use_cuda and not cfg.distributed_training.pipeline_model_parallel: + model.cuda() + model.prepare_for_inference_(cfg) + + # load dataset (possibly sharded) + itr = task.get_batch_iterator( + dataset=task.dataset(cfg.dataset.gen_subset), + max_tokens=cfg.dataset.max_tokens, + max_sentences=cfg.dataset.batch_size, + max_positions=None, + ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=cfg.dataset.required_batch_size_multiple, + seed=cfg.common.seed, + num_shards=cfg.distributed_training.distributed_world_size, + shard_id=cfg.distributed_training.distributed_rank, + num_workers=cfg.dataset.num_workers, + data_buffer_size=cfg.dataset.data_buffer_size, + ).next_epoch_itr(shuffle=False) + progress = progress_bar.progress_bar( + itr, + log_format=cfg.common.log_format, + log_interval=cfg.common.log_interval, + default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), + ) + + for i, sample in enumerate(progress): + if "net_input" not in sample: + continue + sample = utils.move_to_cuda(sample) if use_cuda else sample + outs, _, attn = task.generate_speech( + models, + sample["net_input"], + ) + focus_rate = _calculate_focus_rete(attn) + outs = outs.cpu().numpy() + audio_name = op.basename(sample['name'][0]) + np.save(op.join(cfg.common_eval.results_path, audio_name.replace(".wav", "-feats.npy")), outs) + + logging.info( + "{} (size: {}->{} ({}), focus rate: {:.3f})".format( + sample['name'][0], + sample['src_lengths'][0].item(), + outs.shape[0], + sample['dec_target_lengths'][0].item(), + focus_rate + ) + ) + + if i < 6 and attn is not None: + import shutil + demo_dir = op.join(op.dirname(cfg.common_eval.results_path), "demo") + audio_dir = op.join(demo_dir, "audio") + os.makedirs(audio_dir, exist_ok=True) + shutil.copy(op.join(task.dataset(cfg.dataset.gen_subset).audio_root, sample['tgt_name'][0] if "tgt_name" in sample else sample['name'][0]), op.join(audio_dir, audio_name)) + att_dir = op.join(demo_dir, "att_ws") + _plot_and_save(attn.cpu().numpy(), op.join(att_dir, f"{audio_name}_att_ws.png")) + spec_dir = op.join(demo_dir, "spec") + _plot_and_save(outs.T, op.join(spec_dir, f"{audio_name}_gen.png")) + _plot_and_save(sample["target"][0].cpu().numpy().T, op.join(spec_dir, f"{audio_name}_ori.png")) + + +def cli_main(): + parser = options.get_generation_parser() + args = options.parse_args_and_arch(parser) + main(args) + + +if __name__ == "__main__": + cli_main() diff --git a/SpeechT5/SpeechT5/speecht5/__init__.py b/SpeechT5/SpeechT5/speecht5/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8994f9a368ae4b2eff720fffb134e2a5b813ee1c --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/__init__.py @@ -0,0 +1 @@ +from . import data, tasks, criterions, models # noqa \ No newline at end of file diff --git a/SpeechT5/SpeechT5/speecht5/criterions/__init__.py b/SpeechT5/SpeechT5/speecht5/criterions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f4aa07ca8bf24092095fc9303ee4a1d80cec7b35 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/criterions/__init__.py @@ -0,0 +1,10 @@ +import importlib +import os + + +for file in os.listdir(os.path.dirname(__file__)): + if file.endswith(".py") and not file.startswith("_"): + criterion_name = file[: file.find(".py")] + importlib.import_module( + "speecht5.criterions." + criterion_name + ) \ No newline at end of file diff --git a/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/__init__.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56e5020c6254a9b9b1102491659d3bfcb37f6483 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/__init__.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/speech_pretrain_criterion.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/speech_pretrain_criterion.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be69e3aa7481e6524a65a2500ca9b89fd8a0c8c2 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/speech_pretrain_criterion.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/speech_to_text_loss.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/speech_to_text_loss.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a0a9a01ea49f80475336d94b13dfc2206b01a201 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/speech_to_text_loss.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/speecht5_criterion.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/speecht5_criterion.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e0203be84d2bf6eb2c7d972356b30775ddec376 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/speecht5_criterion.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/text_pretrain_criterion.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/text_pretrain_criterion.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a72cd8e87496bd7f3fbc91f19c75eab96a24ffd7 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/text_pretrain_criterion.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/text_to_speech_loss.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/text_to_speech_loss.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a9e9044096241eb432c34c1bfc18f62a26ec95a4 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/criterions/__pycache__/text_to_speech_loss.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/criterions/speech_pretrain_criterion.py b/SpeechT5/SpeechT5/speecht5/criterions/speech_pretrain_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..8312a50c7556b9ada6b738709c134459cdc6cb86 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/criterions/speech_pretrain_criterion.py @@ -0,0 +1,267 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import math +import re +from dataclasses import dataclass, field +from typing import List, Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion +from speecht5.criterions.text_to_speech_loss import TexttoSpeechLoss, TexttoSpeechLossConfig + + +@dataclass +class SpeechPretrainCriterionConfig(TexttoSpeechLossConfig): + pred_masked_weight: float = field( + default=1.0, + metadata={"help": "weight for predictive loss for masked frames"}, + ) + pred_nomask_weight: float = field( + default=0.0, + metadata={"help": "weight for predictive loss for unmasked frames"}, + ) + loss_weights: Optional[List[float]] = field( + default_factory=lambda: [10,], + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + log_keys: List[str] = field( + default_factory=lambda: [], + metadata={"help": "output keys to log"}, + ) + hubert_weight: float = field( + default=1.0, + metadata={"help": "weight of hubert loss"}, + ) + dec_weight: float = field( + default=1.0, + metadata={"help": "weight of decoder loss"}, + ) + + +class SpeechPretrainCriterion(FairseqCriterion): + def __init__( + self, + task, + sentence_avg, + pred_masked_weight, + pred_nomask_weight, + loss_weights=None, + log_keys=None, + use_masking=True, + use_weighted_masking=False, + loss_type="L1", + bce_pos_weight=5.0, + hubert_weight=1.0, + dec_weight=1.0, + ): + super().__init__(task) + self.pred_masked_weight = pred_masked_weight + self.pred_nomask_weight = pred_nomask_weight + self.loss_weights = loss_weights + self.log_keys = [] if log_keys is None else log_keys + self.hubert_weight = hubert_weight + self.dec_weight = dec_weight + + self.speech_criterion = TexttoSpeechLoss( + task, + sentence_avg, + use_masking, + use_weighted_masking, + loss_type, + bce_pos_weight, + ) + + def forward(self, model, sample, reduce=True, log_pred=False): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + if self.dec_weight == 0: + sample["net_input"]["only_hubert"] = True + net_output, net_output_dec = model(target_list=sample["target_list"], **sample["net_input"]) + loss = 0. + sample_size = 0 + logging_output = {} + reduction = "sum" if reduce else "none" + + loss_m_list = [] + logp_m_list = model.get_logits(net_output, True) + targ_m_list = model.get_targets(None, net_output, True) + assert self.pred_masked_weight == 0 or len(logp_m_list) > 0 + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"loss_m_{i}"] = loss_m.detach().item() + if self.pred_masked_weight > 0: + loss += self.pred_masked_weight * sum(loss_m_list) + sample_size += targ_m_list[0].numel() + + loss_u_list = [] + logp_u_list = model.get_logits(net_output, False) + targ_u_list = model.get_targets(None, net_output, False) + assert self.pred_nomask_weight == 0 or len(logp_u_list) > 0 + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"loss_u_{i}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss += self.pred_nomask_weight * sum(loss_u_list) + sample_size += targ_u_list[0].numel() + + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + if len(self.loss_weights) > len(extra_losses): + modified_loss_weight = self.loss_weights[:len(extra_losses)] + else: + modified_loss_weight = self.loss_weights + + # assert len(extra_losses) == len(self.loss_weights), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, modified_loss_weight): + # print(n + str(coef)) + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss += p + logging_output[f"loss_{n}"] = p.detach().item() + + logging_output = { + "ntokens": sample_size, + "nsentences": sample["id"].numel(), + "sample_size": sample_size, + "ngpu": 1, + **logging_output, + } + + if 'loss_prob_perplexity' in logging_output: + logging_output['code_perplexity'] = net_output['code_perplexity'].detach().item() + + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk].item())) + + def compute_correct(logits): + if logits.numel() == 0: + return 0, 0 + else: + assert logits.dim() > 1, logits.shape + max = logits.argmax(-1) == 0 + min = logits.argmin(-1) == 0 + both = max & min + corr = max.long().sum().item() - both.long().sum().item() + count = max.numel() + return corr, count + + with torch.no_grad(): + for i, logp_m in enumerate(logp_m_list): + corr_m, count_m = compute_correct(logp_m) + logging_output[f"correct_m_{i}"] = corr_m + logging_output[f"count_m_{i}"] = count_m + + for i, logp_u in enumerate(logp_u_list): + corr_u, count_u = compute_correct(logp_u) + logging_output[f"correct_u_{i}"] = corr_u + logging_output[f"count_u_{i}"] = count_u + + if self.dec_weight == 0.0: + logging_output["loss"] = loss.item() if reduce else loss + return loss, sample_size, logging_output + +# ## dec loss + dec_loss, l1_loss, l2_loss, bce_loss, enc_dec_attn_loss = self.speech_criterion.compute_loss(model, net_output_dec, sample) + + # Log tts loss + logging_output['dec_loss'] = dec_loss.item() + logging_output['l1_loss'] = l1_loss.item() + logging_output['l2_loss'] = l2_loss.item() + logging_output['bce_loss'] = bce_loss.item() + if enc_dec_attn_loss is not None: + logging_output['enc_dec_attn_loss'] = enc_dec_attn_loss.item() + + loss = self.hubert_weight * loss + self.dec_weight * sample_size * dec_loss + logging_output["loss"] = loss.item() if reduce else loss + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training (copied from normal cross entropy).""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + dec_loss_sum = sum(log.get("dec_loss", 0) for log in logging_outputs) + l1_loss_sum = sum(log.get("l1_loss", 0) for log in logging_outputs) + l2_loss_sum = sum(log.get("l2_loss", 0) for log in logging_outputs) + bce_loss_sum = sum(log.get("bce_loss", 0) for log in logging_outputs) + ngpu = sum(log.get("ngpu", 0) for log in logging_outputs) + + metrics.log_scalar("loss", loss_sum / sample_size / math.log(2), sample_size, round=3) + if sample_size != ntokens: + metrics.log_scalar("nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3) + metrics.log_derived("ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)) + else: + metrics.log_derived("ppl", lambda meters: utils.get_perplexity(meters["loss"].avg)) + + counts = {} + for lk in logging_outputs[0].keys(): + if lk.startswith("count_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val) + counts[lk] = val + + for lk in logging_outputs[0].keys(): + if lk.startswith("loss_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / sample_size / math.log(2), round=3) + elif lk.startswith("correct_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)]) + elif lk == 'code_perplexity': + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / len(logging_outputs), round=3) + + metrics.log_scalar( + "dec_loss", dec_loss_sum / ngpu, sample_size, 2, round=5 + ) + metrics.log_scalar( + "l1_loss", l1_loss_sum / ngpu, sample_size, 2, round=5 + ) + metrics.log_scalar( + "l2_loss", l2_loss_sum / ngpu, sample_size, 2, round=5 + ) + metrics.log_scalar( + "bce_loss", bce_loss_sum / ngpu, sample_size, 2, round=5 + ) + if "enc_dec_attn_loss" in logging_outputs[0]: + enc_dec_attn_loss_sum = sum(log.get("enc_dec_attn_loss", 0) for log in logging_outputs) + metrics.log_scalar( + "enc_dec_attn_loss", enc_dec_attn_loss_sum / ngpu, sample_size, round=8 + ) + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + raise NotImplementedError() + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/SpeechT5/SpeechT5/speecht5/criterions/speech_to_text_loss.py b/SpeechT5/SpeechT5/speecht5/criterions/speech_to_text_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..29ca90e12093b3a06bb9ee1d364afa3ffcbab19c --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/criterions/speech_to_text_loss.py @@ -0,0 +1,475 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import math +from argparse import Namespace +from dataclasses import dataclass, field +from omegaconf import II +from typing import Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from fairseq.data.data_utils import post_process +from fairseq.tasks import FairseqTask +from fairseq.logging.meters import safe_round + +import logging +logger = logging.getLogger(__name__) + +@dataclass +class SpeechtoTextLossConfig(FairseqDataclass): + zero_infinity: bool = field( + default=False, + metadata={"help": "zero inf loss when source length <= target length"}, + ) + sentence_avg: bool = II("optimization.sentence_avg") + post_process: Optional[str] = field( + default="sentencepiece", + metadata={ + "help": "how to post process predictions into words. can be letter, " + "wordpiece, BPE symbols, etc. " + "See fairseq.data.data_utils.post_process() for full list of options" + }, + ) + wer_kenlm_model: Optional[str] = field( + default=None, + metadata={ + "help": "if this is provided, use kenlm to compute wer (along with other wer_* args)" + }, + ) + wer_lexicon: Optional[str] = field( + default=None, + metadata={"help": "lexicon to use with wer_kenlm_model"}, + ) + wer_lm_weight: float = field( + default=2.0, + metadata={"help": "lm weight to use with wer_kenlm_model"}, + ) + wer_word_score: float = field( + default=-1.0, + metadata={"help": "lm word score to use with wer_kenlm_model"}, + ) + + wer_args: Optional[str] = field( + default=None, + metadata={ + "help": "DEPRECATED: tuple of (wer_kenlm_model, wer_lexicon, wer_lm_weight, wer_word_score)" + }, + ) + + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + report_accuracy: bool = field( + default=False, + metadata={"help": "report accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + #: bool = II("optimization.sentence_avg") + + ce_weight: float = field( + default=1.0, + metadata={"help": "loss weight for cross entropy"}, + ) + ctc_weight: float = field( + default=0.0, + metadata={"help": "loss weiehgt for ctc in ASR"}, + ) + + +def label_smoothed_nll_loss(lprobs, target, epsilon, ignore_index=None, reduce=True): + if target.dim() == lprobs.dim() - 1: + target = target.unsqueeze(-1) + nll_loss = -lprobs.gather(dim=-1, index=target) + smooth_loss = -lprobs.sum(dim=-1, keepdim=True) + if ignore_index is not None: + pad_mask = target.eq(ignore_index) + nll_loss.masked_fill_(pad_mask, 0.0) + smooth_loss.masked_fill_(pad_mask, 0.0) + else: + nll_loss = nll_loss.squeeze(-1) + smooth_loss = smooth_loss.squeeze(-1) + if reduce: + nll_loss = nll_loss.sum() + smooth_loss = smooth_loss.sum() + eps_i = epsilon / (lprobs.size(-1) - 1) + loss = (1.0 - epsilon - eps_i) * nll_loss + eps_i * smooth_loss + return loss, nll_loss + + +class SpeechtoTextLoss(FairseqCriterion): + def __init__( + self, + cfg: SpeechtoTextLossConfig, + task: FairseqTask, + sentence_avg=True, + label_smoothing=0.1, + ignore_prefix_size=0, + report_accuracy=False, + ce_weight=1.0, + ctc_weight=0.0, + ): + + super().__init__(task) + self.blank_idx = ( + task.target_dictionary.index(task.blank_symbol) + if hasattr(task, "blank_symbol") + else 0 + ) + #print ("self.blank_idx: ", self.blank_idx) + + self.pad_idx = task.target_dictionary.pad() + self.eos_idx = task.target_dictionary.eos() + self.post_process = cfg.post_process + self.ce_weight = ce_weight + self.ctc_weight = ctc_weight + + ## for ce + self.sentence_avg = sentence_avg + self.eps = label_smoothing + self.ignore_prefix_size = ignore_prefix_size + self.report_accuracy = report_accuracy + + if cfg.wer_args is not None: + ( + cfg.wer_kenlm_model, + cfg.wer_lexicon, + cfg.wer_lm_weight, + cfg.wer_word_score, + ) = eval(cfg.wer_args) + + if cfg.wer_kenlm_model is not None: + from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder + + dec_args = Namespace() + dec_args.nbest = 1 + dec_args.criterion = "ctc" + dec_args.kenlm_model = cfg.wer_kenlm_model + dec_args.lexicon = cfg.wer_lexicon + dec_args.beam = 50 + dec_args.beam_size_token = min(50, len(task.target_dictionary)) + dec_args.beam_threshold = min(50, len(task.target_dictionary)) + dec_args.lm_weight = cfg.wer_lm_weight + dec_args.word_score = cfg.wer_word_score + dec_args.unk_weight = -math.inf + dec_args.sil_weight = 0 + + self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary) + else: + self.w2l_decoder = None + + self.zero_infinity = cfg.zero_infinity + #self.sentence_avg = cfg.sentence_avg + + if self.ce_weight > 0 and self.ctc_weight > 0: + logger.info("Using cross entropy loss and CTC loss for ASR") + elif self.ce_weight > 0: + logger.info("Only using CE loss") + elif self.ctc_weight > 0: + logger.info("Only using CTC loss for ASR") + else: + logger.info("ERROR") + + def forward(self, model, sample, reduce=True): + + if self.ce_weight == 0 and self.ctc_weight > 0: + sample["only_ctc"] = True + + net_output_decoder, net_output = model(**sample["net_input"]) + + if self.ce_weight > 0: + loss_ce, nll_loss_ce = self.compute_loss(model, net_output_decoder, sample, reduce=reduce) + #print ("loss_ce: ", loss_ce) + else: + nll_loss_ce = None + + if self.ctc_weight > 0: + loss_ctc, lprobs, input_lengths = self.compute_loss_ctc(model, net_output, sample) + + if self.ce_weight > 0 and self.ctc_weight > 0: + loss = self.ce_weight * loss_ce + self.ctc_weight * loss_ctc + elif self.ce_weight > 0: + loss = loss_ce + elif self.ctc_weight > 0: + loss = loss_ctc + else: + logger.info("ERROR: must ce_weight > 0 or ctc_weight > 0") + + ntokens = ( + sample["ntokens"] if "ntokens" in sample else sample["target_lengths"].sum().item() + ) + + sample_size = sample["target"].size(0) if self.sentence_avg else ntokens + + logging_output = { + "loss": loss.item(), + "ce_loss": loss_ce.item() if self.ce_weight > 0 else 0, + "ctc_loss": loss_ctc.item() if self.ctc_weight > 0 else 0, + "nll_loss": nll_loss_ce.item() if nll_loss_ce is not None else 0, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "sample_size": sample_size, + } + + if self.ce_weight > 0 and self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output_decoder, sample) + logging_output["n_correct"] = utils.item(n_correct.item()) + logging_output["total"] = utils.item(total.data) + + if self.ctc_weight > 0 and not model.training: + import editdistance + + with torch.no_grad(): + lprobs_t = lprobs.transpose(0, 1).float().contiguous().cpu() + + c_err = 0 + c_len = 0 + w_errs = 0 + w_len = 0 + wv_errs = 0 + for lp, t, inp_l in zip( + lprobs_t, + sample["target_label"] + if "target_label" in sample + else sample["target"], + input_lengths, + ): + lp = lp[:inp_l].unsqueeze(0) + + decoded = None + if self.w2l_decoder is not None: + decoded = self.w2l_decoder.decode(lp) + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + + p = (t != self.task.target_dictionary.pad()) & ( + t != self.task.target_dictionary.eos() + ) + targ = t[p] + targ_units = self.task.target_dictionary.string(targ) + targ_units_arr = targ.tolist() + + toks = lp.argmax(dim=-1).unique_consecutive() + pred_units_arr = toks[toks != self.blank_idx].tolist() + + c_err += editdistance.eval(pred_units_arr, targ_units_arr) + c_len += len(targ_units_arr) + + targ_words = post_process(targ_units, self.post_process).split() + + pred_units = self.task.target_dictionary.string(pred_units_arr) + pred_words_raw = post_process(pred_units, self.post_process).split() + + if decoded is not None and "words" in decoded: + pred_words = decoded["words"] + w_errs += editdistance.eval(pred_words, targ_words) + wv_errs += editdistance.eval(pred_words_raw, targ_words) + else: + dist = editdistance.eval(pred_words_raw, targ_words) + w_errs += dist + wv_errs += dist + + w_len += len(targ_words) + + logging_output["wv_errors"] = wv_errs + logging_output["w_errors"] = w_errs + logging_output["w_total"] = w_len + logging_output["c_errors"] = c_err + logging_output["c_total"] = c_len + + return loss, sample_size, logging_output + + def compute_loss_ctc(self, model, net_output, sample): + lprobs = model.get_normalized_probs_for_ctc( + net_output, log_probs=True + ).contiguous() # (T, B, C) from the encoder + + if net_output["encoder_padding_mask"] is not None: + non_padding_mask = ~net_output["encoder_padding_mask"][0] + input_lengths = non_padding_mask.long().sum(-1) + else: + input_lengths = lprobs.new_full( + (lprobs.size(1),), lprobs.size(0), dtype=torch.long + ) + + pad_mask = (sample["target"] != self.pad_idx) & ( + sample["target"] != self.eos_idx + ) + targets_flat = sample["target"].masked_select(pad_mask) + if "target_lengths" in sample: + target_lengths = sample["target_lengths"] + else: + target_lengths = pad_mask.sum(-1) + + ##processing + target_lengths = target_lengths - 1 + + with torch.backends.cudnn.flags(enabled=False): + loss_ctc = F.ctc_loss( + lprobs, + targets_flat, + input_lengths, + target_lengths, + blank=self.blank_idx, + reduction="sum", + zero_infinity=self.zero_infinity, + ) + + return loss_ctc, lprobs, input_lengths + + ## for ce + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = model.get_targets(sample, net_output) + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + def compute_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.padding_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.padding_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + nll_loss_sum = sum(log.get("nll_loss", 0) for log in logging_outputs) + ce_loss_sum = sum(log.get("ce_loss", 0) for log in logging_outputs) + ctc_loss_sum = sum(log.get("ctc_loss", 0) for log in logging_outputs) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + nsentences = utils.item( + sum(log.get("nsentences", 0) for log in logging_outputs) + ) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + + metrics.log_scalar( + "ctc_loss", ctc_loss_sum / sample_size / math.log(2), ntokens, 2, round=3 + ) + metrics.log_scalar( + "ce_loss", ce_loss_sum / ntokens, ntokens, 2, round=3 + ) + metrics.log_scalar( + "nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, 2, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg, 2) + ) + + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("n_correct", n_correct) + metrics.log_derived( + "accuracy", + lambda meters: round( + meters["n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + 2 + ) + + metrics.log_scalar("ntokens", ntokens) + metrics.log_scalar("nsentences", nsentences) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + + c_errors = sum(log.get("c_errors", 0) for log in logging_outputs) + metrics.log_scalar("_c_errors", c_errors) + c_total = sum(log.get("c_total", 0) for log in logging_outputs) + metrics.log_scalar("_c_total", c_total) + w_errors = sum(log.get("w_errors", 0) for log in logging_outputs) + metrics.log_scalar("_w_errors", w_errors) + wv_errors = sum(log.get("wv_errors", 0) for log in logging_outputs) + metrics.log_scalar("_wv_errors", wv_errors) + w_total = sum(log.get("w_total", 0) for log in logging_outputs) + metrics.log_scalar("_w_total", w_total) + + if c_total > 0: + metrics.log_derived( + "uer", + lambda meters: safe_round( + meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3 + ) + if meters["_c_total"].sum > 0 + else float("nan"), + ) + if w_total > 0: + metrics.log_derived( + "wer", + lambda meters: safe_round( + meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + metrics.log_derived( + "raw_wer", + lambda meters: safe_round( + meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/SpeechT5/SpeechT5/speecht5/criterions/speecht5_criterion.py b/SpeechT5/SpeechT5/speecht5/criterions/speecht5_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..902c1a6dad87103ec1a01df99f88330359b704ff --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/criterions/speecht5_criterion.py @@ -0,0 +1,445 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import re +from dataclasses import dataclass + +import math +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from speecht5.criterions.text_to_speech_loss import TexttoSpeechLoss +from speecht5.criterions.text_pretrain_criterion import TextPretrainCriterion, TextPretrainCriterionConfig +from fairseq.criterions.label_smoothed_cross_entropy import LabelSmoothedCrossEntropyCriterionConfig +from speecht5.criterions.speech_pretrain_criterion import SpeechPretrainCriterion, SpeechPretrainCriterionConfig +from speecht5.criterions.speech_to_text_loss import SpeechtoTextLoss, SpeechtoTextLossConfig +from fairseq.logging.meters import safe_round + +@dataclass +class SpeechT5CriterionConfig( + LabelSmoothedCrossEntropyCriterionConfig, + TextPretrainCriterionConfig, + SpeechPretrainCriterionConfig, + SpeechtoTextLossConfig + ): + pass + +@register_criterion( + "speecht5", dataclass=SpeechT5CriterionConfig +) +class SpeechT5Criterion(FairseqCriterion): + def __init__( + self, + task, + sentence_avg, + label_smoothing, + pred_masked_weight, + pred_nomask_weight, + loss_weights=None, + log_keys=None, + ignore_prefix_size=0, + report_accuracy=False, + use_masking=True, + use_weighted_masking=False, + loss_type="L1", + bce_pos_weight=5.0, + bce_loss_lambda=1.0, + use_guided_attn_loss=False, + num_heads_applied_guided_attn=2, + ce_weight=1.0, + ctc_weight=0.0, + hubert_weight=1.0, + dec_weight=1.0, + bart_weight=1.0, + ): + super().__init__(task) + self.speech_criterion = TexttoSpeechLoss( + task, + sentence_avg, + use_masking, + use_weighted_masking, + loss_type, + bce_pos_weight, + bce_loss_lambda, + use_guided_attn_loss, + num_heads_applied_guided_attn=num_heads_applied_guided_attn, + ) + self.text_criterion = SpeechtoTextLoss( + SpeechtoTextLossConfig, + task, + sentence_avg, + label_smoothing, + ignore_prefix_size, + report_accuracy, + ce_weight, + ctc_weight + ) + self.text_pretrain_criterion = TextPretrainCriterion( + task, + sentence_avg, + bart_weight, + loss_weights, + ) + self.speech_pretrain_criterion = SpeechPretrainCriterion( + task, + sentence_avg, + pred_masked_weight, + pred_nomask_weight, + loss_weights, + log_keys, + use_masking, + use_weighted_masking, + loss_type, + bce_pos_weight, + hubert_weight, + dec_weight + ) + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + + task_name = sample['task_name'] + if task_name == 's2t' or task_name == 's2c': + return self.text_criterion(model, sample, reduce) + elif task_name == 't2s' or task_name == 's2s': + return self.speech_criterion(model, sample) + elif task_name == 'text_pretrain': + return self.text_pretrain_criterion(model, sample, reduce) + elif task_name == 'speech_pretrain': + return self.speech_pretrain_criterion(model, sample, reduce) + + @classmethod + def reduce_metrics(cls, logging_outputs): + """Aggregate logging outputs from data parallel training.""" + logging_outputs_dict = {} + for logging_output in logging_outputs: + for task_name in logging_output: + if task_name not in ['s2t', 't2s', 's2c', 's2s', 'text_pretrain', 'speech_pretrain']: + continue + + if task_name not in logging_outputs_dict: + logging_outputs_dict[task_name] = [] + logging_outputs_dict[task_name].append(logging_output[task_name]) + + for task_name in logging_outputs_dict: + if task_name == 's2t': + # LabelSmoothedCrossEntropyCriterion.reduce_metrics([logging_output['s2t'] for logging_output in logging_outputs]) + s2t_logging_output = logging_outputs_dict[task_name] + # s2t_sum = sum(log.get("ce_loss", 0) for log in logging_outputs) + loss_sum = sum(log.get("loss", 0) for log in s2t_logging_output) + nll_loss_sum = sum(log.get("nll_loss", 0) for log in s2t_logging_output) + ntokens = sum(log.get("ntokens", 0) for log in s2t_logging_output) + ce_loss_sum = sum(log.get("ce_loss", 0) for log in s2t_logging_output) + ctc_loss_sum = sum(log.get("ctc_loss", 0) for log in s2t_logging_output) + + sample_size = max(1, sum(log.get("sample_size", 0) for log in s2t_logging_output)) + metrics.log_scalar( + "s2t_loss", loss_sum / sample_size / math.log(2), sample_size, 1, round=3 + ) + + metrics.log_scalar( + "s2t_nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, 2, round=3 + ) + metrics.log_derived( + "s2t_ppl", lambda meters: utils.get_perplexity(meters["s2t_nll_loss"].avg, 2) + ) + metrics.log_scalar( + "ctc_loss", ctc_loss_sum / sample_size / math.log(2), ntokens, 2, round=3 + ) + metrics.log_scalar( + "ce_loss", ce_loss_sum / ntokens, ntokens, 2, round=3 + ) + + total = utils.item(sum(log.get("total", 0) for log in s2t_logging_output)) + if total > 0: + metrics.log_scalar("s2t_total", total) + n_correct = utils.item( + sum(log.get("n_correct", 0) for log in s2t_logging_output) + ) + metrics.log_scalar("s2t_n_correct", n_correct) + metrics.log_derived( + "s2t_accuracy", + lambda meters: round( + meters["s2t_n_correct"].sum * 100.0 / meters["s2t_total"].sum, 3 + ) + if meters["s2t_total"].sum > 0 + else float("nan"), + 2 + ) + c_errors = sum(log.get("c_errors", 0) for log in s2t_logging_output) + metrics.log_scalar("_c_errors", c_errors) + c_total = sum(log.get("c_total", 0) for log in s2t_logging_output) + metrics.log_scalar("_c_total", c_total) + w_errors = sum(log.get("w_errors", 0) for log in s2t_logging_output) + metrics.log_scalar("_w_errors", w_errors) + wv_errors = sum(log.get("wv_errors", 0) for log in s2t_logging_output) + metrics.log_scalar("_wv_errors", wv_errors) + w_total = sum(log.get("w_total", 0) for log in s2t_logging_output) + metrics.log_scalar("_w_total", w_total) + if c_total > 0: + metrics.log_derived( + "uer", + lambda meters: safe_round( + meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3 + ) + if meters["_c_total"].sum > 0 + else float("nan"), + ) + if w_total > 0: + metrics.log_derived( + "wer", + lambda meters: safe_round( + meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + metrics.log_derived( + "raw_wer", + lambda meters: safe_round( + meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + + if task_name == 't2s': + # TTSLossCriterion.reduce_metrics([logging_output['t2s'] for logging_output in logging_outputs]) + # t2s_sum = sum(log.get("speech_loss", 0) for log in logging_outputs) + t2s_logging_output = logging_outputs_dict[task_name] + loss_sum = sum(log.get("loss", 0) for log in t2s_logging_output) + l1_loss_sum = sum(log.get("l1_loss", 0) for log in t2s_logging_output) + l2_loss_sum = sum(log.get("l2_loss", 0) for log in t2s_logging_output) + bce_loss_sum = sum(log.get("bce_loss", 0) for log in t2s_logging_output) + sample_size = max(1, sum(log.get("sample_size", 0) for log in t2s_logging_output)) + metrics.log_scalar( + "t2s_loss", loss_sum / sample_size, sample_size, 1, round=5 + ) + encoder_alpha_sum = sum(log.get("encoder_alpha", 0) for log in t2s_logging_output) + decoder_alpha_sum = sum(log.get("decoder_alpha", 0) for log in t2s_logging_output) + ngpu = sum(log.get("ngpu", 0) for log in t2s_logging_output) + + metrics.log_scalar( + "t2s_l1_loss", l1_loss_sum / sample_size, sample_size, 2, round=5 + ) + metrics.log_scalar( + "t2s_l2_loss", l2_loss_sum / sample_size, sample_size, 2, round=5 + ) + metrics.log_scalar( + "t2s_bce_loss", bce_loss_sum / sample_size, sample_size, 2, round=5 + ) + metrics.log_scalar( + "t2s_encoder_alpha", encoder_alpha_sum / sample_size, sample_size, round=5 + ) + metrics.log_scalar( + "t2s_decoder_alpha", decoder_alpha_sum / sample_size, sample_size, round=5 + ) + + if "enc_dec_attn_loss" in t2s_logging_output[0]: + enc_dec_attn_loss_sum = sum(log.get("enc_dec_attn_loss", 0) for log in t2s_logging_output) + metrics.log_scalar( + "t2s_enc_dec_attn_loss", enc_dec_attn_loss_sum / sample_size, sample_size, round=8 + ) + + if task_name == 's2c': + s2c_logging_output = logging_outputs_dict[task_name] + loss_sum = sum(log.get("loss", 0) for log in s2c_logging_output) + nll_loss_sum = sum(log.get("nll_loss", 0) for log in s2c_logging_output) + ntokens = sum(log.get("ntokens", 0) for log in s2c_logging_output) + + sample_size = max(1, sum(log.get("sample_size", 0) for log in s2c_logging_output)) + metrics.log_scalar( + "s2c_loss", loss_sum / sample_size / math.log(2), sample_size, 1, round=3 + ) + + metrics.log_scalar( + "s2c_nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, 2, round=3 + ) + + total = utils.item(sum(log.get("total", 0) for log in s2c_logging_output)) + if total > 0: + metrics.log_scalar("s2c_total", total) + n_correct = utils.item(sum(log.get("n_correct", 0) for log in s2c_logging_output)) + metrics.log_scalar("s2c_n_correct", n_correct) + metrics.log_derived( + "s2c_accuracy", + lambda meters: round( + meters["s2c_n_correct"].sum * 100.0 / meters["s2c_total"].sum, 3 + ) + if meters["s2c_total"].sum > 0 + else float("nan"), + 2 + ) + + if task_name == 's2s': + s2s_logging_output = logging_outputs_dict[task_name] + loss_sum = sum(log.get("loss", 0) for log in s2s_logging_output) + l1_loss_sum = sum(log.get("l1_loss", 0) for log in s2s_logging_output) + l2_loss_sum = sum(log.get("l2_loss", 0) for log in s2s_logging_output) + bce_loss_sum = sum(log.get("bce_loss", 0) for log in s2s_logging_output) + sample_size = max(1, sum(log.get("sample_size", 0) for log in s2s_logging_output)) + metrics.log_scalar( + "s2s_loss", loss_sum / sample_size, sample_size, 1, round=5 + ) + encoder_alpha_sum = sum(log.get("encoder_alpha", 0) for log in s2s_logging_output) + decoder_alpha_sum = sum(log.get("decoder_alpha", 0) for log in s2s_logging_output) + ngpu = sum(log.get("ngpu", 0) for log in s2s_logging_output) + + metrics.log_scalar( + "s2s_l1_loss", l1_loss_sum / sample_size, sample_size, 2, round=5 + ) + metrics.log_scalar( + "s2s_l2_loss", l2_loss_sum / sample_size, sample_size, 2, round=5 + ) + metrics.log_scalar( + "s2s_bce_loss", bce_loss_sum / sample_size, sample_size, 2, round=5 + ) + metrics.log_scalar( + "s2s_decoder_alpha", decoder_alpha_sum / sample_size, sample_size, round=5 + ) + + if "enc_dec_attn_loss" in s2s_logging_output[0]: + enc_dec_attn_loss_sum = sum(log.get("enc_dec_attn_loss", 0) for log in s2s_logging_output) + metrics.log_scalar( + "s2s_enc_dec_attn_loss", enc_dec_attn_loss_sum / sample_size, sample_size, round=8 + ) + + if task_name == 'text_pretrain': + bart_logging_output = logging_outputs_dict[task_name] + loss_sum = sum(log.get("loss", 0) for log in bart_logging_output) + ntokens = sum(log.get("ntokens", 0) for log in bart_logging_output) + sample_size = max(1, sum(log.get("sample_size", 0) for log in bart_logging_output)) + bart_loss_sum = sum(log.get("bart_loss", 0) for log in bart_logging_output) + + # we divide by log(2) to convert the loss from base e to base 2 + metrics.log_scalar( + "text_loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "bart_loss", bart_loss_sum / sample_size / math.log(2), ntokens, 2, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "bart_nll_loss", bart_loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "bart_ppl", lambda meters: utils.get_perplexity(meters["bart_nll_loss"].avg) + ) + else: + metrics.log_derived( + "bart_ppl", lambda meters: utils.get_perplexity(meters["bart_loss"].avg) + ) + metrics.log_scalar("bart_wpb", ntokens, priority=180, round=1) + + val_prob_perplexity = 0 + val_code_perplexity = 0 + sample_size_pp = 0 + count_log_cp = 0 + for log in bart_logging_output: + if "loss_prob_perplexity" in log: + val_prob_perplexity = val_prob_perplexity + log["loss_prob_perplexity"] + sample_size_pp = sample_size_pp + log["sample_size"] + if "code_perplexity" in log: + val_code_perplexity = val_code_perplexity + log["code_perplexity"] + count_log_cp = count_log_cp + 1 + if val_prob_perplexity > 0: + metrics.log_scalar("text_loss_prob_perplexity", val_prob_perplexity / sample_size_pp / math.log(2), round=3) + if val_code_perplexity > 0: + metrics.log_scalar("text_code_perplexity", val_code_perplexity / count_log_cp, round=3) + + if task_name == 'speech_pretrain': + hubert_logging_output = logging_outputs_dict[task_name] + loss_sum = sum(log.get("loss", 0) for log in hubert_logging_output) + ntokens = sum(log.get("ntokens", 0) for log in hubert_logging_output) + sample_size = max(1, sum(log.get("sample_size", 0) for log in hubert_logging_output)) + dec_loss_sum = sum(log.get("dec_loss", 0) for log in hubert_logging_output) + l1_loss_sum = sum(log.get("l1_loss", 0) for log in hubert_logging_output) + l2_loss_sum = sum(log.get("l2_loss", 0) for log in hubert_logging_output) + bce_loss_sum = sum(log.get("bce_loss", 0) for log in hubert_logging_output) + ngpu = sum(log.get("ngpu", 0) for log in hubert_logging_output) + + metrics.log_scalar("hubert_loss", loss_sum / sample_size / math.log(2), sample_size, round=3) + if sample_size != ntokens: + metrics.log_scalar("hubert_nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3) + metrics.log_derived("hubert_ppl", lambda meters: utils.get_perplexity(meters["hubert_nll_loss"].avg)) + else: + metrics.log_derived("hubert_ppl", lambda meters: utils.get_perplexity(meters["hubert_loss"].avg)) + + counts = {} + for lk in hubert_logging_output[0].keys(): + if lk.startswith("count_"): + val = sum(log[lk] for log in hubert_logging_output) + metrics.log_scalar("hubert_" + lk, val) + counts[lk] = val + + for lk in hubert_logging_output[0].keys(): + if lk.startswith("loss_") and lk != 'loss_prob_perplexity': + val = sum(log[lk] for log in hubert_logging_output) + metrics.log_scalar("hubert_" + lk, val / sample_size / math.log(2), round=3) + elif lk.startswith("correct_"): + val = sum(log[lk] for log in hubert_logging_output) + metrics.log_scalar("hubert_" + lk, val / counts[re.sub("correct", "count", lk)]) + # elif lk == 'code_perplexity': + # val = sum(log[lk] for log in hubert_logging_output) + # metrics.log_scalar("hubert_" + lk, val / len(hubert_logging_output), round=3) + + val_prob_perplexity = 0 + val_code_perplexity = 0 + sample_size_pp = 0 + count_log_cp = 0 + for log in hubert_logging_output: + if "loss_prob_perplexity" in log: + val_prob_perplexity = val_prob_perplexity + log["loss_prob_perplexity"] + sample_size_pp = sample_size_pp + log["sample_size"] + if "code_perplexity" in log: + val_code_perplexity = val_code_perplexity + log["code_perplexity"] + count_log_cp = count_log_cp + 1 + if val_prob_perplexity > 0: + metrics.log_scalar("hubert_loss_prob_perplexity", val_prob_perplexity / sample_size_pp / math.log(2), round=3) + if val_code_perplexity > 0: + metrics.log_scalar("hubert_code_perplexity", val_code_perplexity / count_log_cp, round=3) + + metrics.log_scalar( + "hubert_dec_loss", dec_loss_sum / ngpu, sample_size, 2, round=5 + ) + metrics.log_scalar( + "hubert_l1_loss", l1_loss_sum / ngpu, sample_size, 2, round=5 + ) + metrics.log_scalar( + "hubert_l2_loss", l2_loss_sum / ngpu, sample_size, 2, round=5 + ) + metrics.log_scalar( + "hubert_bce_loss", bce_loss_sum / ngpu, sample_size, 2, round=5 + ) + if "enc_dec_attn_loss" in hubert_logging_output[0]: + enc_dec_attn_loss_sum = sum(log.get("enc_dec_attn_loss", 0) for log in hubert_logging_output) + metrics.log_scalar( + "hubert_enc_dec_attn_loss", enc_dec_attn_loss_sum / ngpu, sample_size, round=8 + ) + metrics.log_scalar("hubert_wpb", ntokens, priority=180, round=1) + + loss = sum(log.get("loss", 0) for log in logging_outputs) + sample_size = max(1, sum(log.get("sample_size", 0) for log in logging_outputs)) + metrics.log_scalar( + "loss", loss / sample_size, sample_size, 1, round=5 + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/SpeechT5/SpeechT5/speecht5/criterions/text_pretrain_criterion.py b/SpeechT5/SpeechT5/speecht5/criterions/text_pretrain_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..089efcd7a0792aeda8410d9d5bbc9c3e313e2271 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/criterions/text_pretrain_criterion.py @@ -0,0 +1,144 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import math +from dataclasses import dataclass, field +from typing import List, Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from omegaconf import II + + +@dataclass +class TextPretrainCriterionConfig(FairseqDataclass): + sentence_avg: bool = II("optimization.sentence_avg") + loss_weights: Optional[List[float]] = field( + default_factory=lambda: [0.1,], + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + bart_weight: float = field( + default=1.0, + metadata={"help": "loss weight for cross entropy"}, + ) + + +class TextPretrainCriterion(FairseqCriterion): + def __init__(self, task, sentence_avg, bart_weight, loss_weights=None): + super().__init__(task) + self.sentence_avg = sentence_avg + self.loss_weights = loss_weights + self.bart_weight = bart_weight + + def forward(self, model, sample, reduce=True): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output, codebook_out, encoder_output = model(**sample["net_input"]) + bart_loss, _ = self.compute_loss(model, net_output, sample, reduce=reduce) + sample_size = ( + sample["target"].size(0) if self.sentence_avg else sample["ntokens"] + ) + + loss = self.bart_weight * bart_loss + logging_output = { + "loss": loss.item(), + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + "bart_loss": bart_loss.item(), + "sample_size": sample_size, + } + + if "prob_perplexity" in codebook_out: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(codebook_out) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + if len(self.loss_weights) > len(extra_losses): + modified_loss_weight = self.loss_weights[len(extra_losses):] + else: + modified_loss_weight = self.loss_weights + + # assert len(extra_losses) == len(self.loss_weights), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, modified_loss_weight): + # print(n + str(coef)) + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss += p + logging_output[f"loss_{n}"] = p.item() + + if 'loss_prob_perplexity' in logging_output: + logging_output['code_perplexity'] = codebook_out['code_perplexity'].item() + + return loss, sample_size, logging_output + + def compute_loss(self, model, net_output, sample, reduce=True): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + lprobs = lprobs.view(-1, lprobs.size(-1)) + target = model.get_targets(sample, net_output).view(-1) + loss = F.nll_loss( + lprobs, + target, + ignore_index=self.padding_idx, + reduction="sum" if reduce else "none", + ) + return loss, loss + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + bart_loss_sum = sum(log.get("bart_loss", 0) for log in logging_outputs) + + # we divide by log(2) to convert the loss from base e to base 2 + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "bart_loss", bart_loss_sum / sample_size / math.log(2), ntokens, 2, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", bart_loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + else: + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["bart_loss"].avg) + ) + + if "loss_prob_perplexity" in logging_outputs[0].keys(): + val = sum(log["loss_prob_perplexity"] for log in logging_outputs) + metrics.log_scalar("loss_prob_perplexity", val / sample_size / math.log(2), round=3) + if "code_perplexity" in logging_outputs[0].keys(): + val = sum(log["code_perplexity"] for log in logging_outputs) + metrics.log_scalar("code_perplexity", val / len(logging_outputs), round=3) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/SpeechT5/SpeechT5/speecht5/criterions/text_to_speech_loss.py b/SpeechT5/SpeechT5/speecht5/criterions/text_to_speech_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..de2034ca0ce32a622b9bc1412cffbdca4fe84394 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/criterions/text_to_speech_loss.py @@ -0,0 +1,427 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +from dataclasses import dataclass, field + +import torch +from fairseq import metrics, utils +from espnet.nets.pytorch_backend.nets_utils import make_non_pad_mask +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass +from speecht5.models.modules.speech_encoder_prenet import SpeechEncoderPrenet +from espnet.nets.pytorch_backend.e2e_tts_tacotron2 import GuidedAttentionLoss +from omegaconf import II +from typing import Any + + +@dataclass +class TexttoSpeechLossConfig(FairseqDataclass): + use_masking: bool = field( + default=True, + metadata={"help": "Whether to use masking in calculation of loss"}, + ) + use_weighted_masking: bool = field( + default=False, + metadata={"help": "Whether to use weighted masking in calculation of loss"}, + ) + loss_type: str = field( + default="L1", + metadata={"help": "How to calc loss"}, + ) + bce_pos_weight: float = field( + default=5.0, + metadata={"help": "Positive sample weight in BCE calculation (only for use-masking=True)"}, + ) + bce_loss_lambda: float = field( + default=1.0, + metadata={"help": "Lambda in bce loss"}, + ) + use_guided_attn_loss: bool = field( + default=False, + metadata={"help": "Whether to use guided attention loss"}, + ) + guided_attn_loss_sigma: float = field( + default=0.4, + metadata={"help": "Sigma in guided attention loss"}, + ) + guided_attn_loss_lambda: float = field( + default=10.0, + metadata={"help": "Lambda in guided attention loss"}, + ) + num_layers_applied_guided_attn: int = field( + default=2, + metadata={"help": "Number of layers to be applied guided attention loss, if set -1, all of the layers will be applied."}, + ) + num_heads_applied_guided_attn: int = field( + default=2, + metadata={"help": "Number of heads in each layer to be applied guided attention loss, if set -1, all of the heads will be applied."}, + ) + modules_applied_guided_attn: Any = field( + default=("encoder-decoder",), + metadata={"help": "Module name list to be applied guided attention loss"}, + ) + sentence_avg: bool = II("optimization.sentence_avg") + + +class TexttoSpeechLoss(FairseqCriterion): + def __init__( + self, + task, + sentence_avg, + use_masking=True, + use_weighted_masking=False, + loss_type="L1", + bce_pos_weight=5.0, + bce_loss_lambda=1.0, + use_guided_attn_loss=False, + guided_attn_loss_sigma=0.4, + guided_attn_loss_lambda=1.0, + num_layers_applied_guided_attn=2, + num_heads_applied_guided_attn=2, + modules_applied_guided_attn=["encoder-decoder"], + ): + super().__init__(task) + self.sentence_avg = sentence_avg + self.use_masking = use_masking + self.use_weighted_masking = use_weighted_masking + self.loss_type = loss_type + self.bce_pos_weight = bce_pos_weight + self.bce_loss_lambda = bce_loss_lambda + self.use_guided_attn_loss = use_guided_attn_loss + self.guided_attn_loss_sigma = guided_attn_loss_sigma + self.guided_attn_loss_lambda = guided_attn_loss_lambda + # define loss function + self.criterion = Tacotron2Loss( + use_masking=use_masking, + use_weighted_masking=use_weighted_masking, + bce_pos_weight=bce_pos_weight, + ) + if self.use_guided_attn_loss: + self.num_layers_applied_guided_attn = num_layers_applied_guided_attn + self.num_heads_applied_guided_attn = num_heads_applied_guided_attn + self.modules_applied_guided_attn = modules_applied_guided_attn + if self.use_guided_attn_loss: + self.attn_criterion = GuidedMultiHeadAttentionLoss( + sigma=guided_attn_loss_sigma, + alpha=guided_attn_loss_lambda, + ) + + def forward(self, model, sample): + """Compute the loss for the given sample. + + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + net_output = model(**sample["net_input"]) + loss, l1_loss, l2_loss, bce_loss, enc_dec_attn_loss = self.compute_loss(model, net_output, sample) + # sample_size = ( + # sample["target"].size(0) if self.sentence_avg else sample["nframes"] + # ) + sample_size = 1 + logging_output = { + "loss": loss.item(), + "l1_loss": l1_loss.item(), + "l2_loss": l2_loss.item(), + "bce_loss": bce_loss.item(), + "sample_size": 1, + "ntokens": sample["ntokens"], + "nsentences": sample["target"].size(0), + } + + if enc_dec_attn_loss is not None: + logging_output['enc_dec_attn_loss'] = enc_dec_attn_loss.item() + + if hasattr(model, 'text_encoder_prenet'): + logging_output["encoder_alpha"] = model.text_encoder_prenet.encoder_prenet[-1].alpha.item() + logging_output["decoder_alpha"] = model.speech_decoder_prenet.decoder_prenet[-1].alpha.item() + elif hasattr(model, "speech_encoder_prenet"): + logging_output["decoder_alpha"] = model.speech_decoder_prenet.decoder_prenet[-1].alpha.item() + else: + if 'task' not in sample: + logging_output["encoder_alpha"] = model.encoder_prenet.encoder_prenet[-1].alpha.item() + logging_output["decoder_alpha"] = model.decoder_prenet.decoder_prenet[-1].alpha.item() + + return loss, sample_size, logging_output + + def compute_loss(self, model, net_output, sample): + before_outs, after_outs, logits, attn = net_output + labels = sample["labels"] + ys = sample["dec_target"] + olens = sample["dec_target_lengths"] + ilens = sample["src_lengths"] + + # modifiy mod part of groundtruth + if model.reduction_factor > 1: + olens_in = olens.new([torch.div(olen, model.reduction_factor, rounding_mode='floor') for olen in olens]) + olens = olens.new([olen - olen % model.reduction_factor for olen in olens]) + max_olen = max(olens) + ys = ys[:, :max_olen] + labels = labels[:, :max_olen] + labels = torch.scatter(labels, 1, (olens - 1).unsqueeze(1), 1.0) # make sure at least one frame has 1 + # labels[:, -1] = 1.0 + else: + olens_in = olens + + # caluculate loss values + l1_loss, l2_loss, bce_loss = self.criterion( + after_outs, before_outs, logits, ys, labels, olens + ) + + # l1_loss = l1_loss / ys.size(2) + # l2_loss = l2_loss / ys.size(2) + + if self.loss_type == "L1": + loss = l1_loss + self.bce_loss_lambda * bce_loss if self.bce_loss_lambda > 0.0 else l1_loss + elif self.loss_type == "L2": + loss = l2_loss + self.bce_loss_lambda * bce_loss if self.bce_loss_lambda > 0.0 else l2_loss + elif self.loss_type == "L1+L2": + loss = l1_loss + l2_loss + self.bce_loss_lambda * bce_loss if self.bce_loss_lambda > 0.0 else l1_loss + l2_loss + else: + raise ValueError("unknown --loss-type " + self.loss_type) + + # calculate guided attention loss + enc_dec_attn_loss = None + if self.use_guided_attn_loss: + # calculate the input lengths of encoder, which is determined by encoder prenet + if hasattr(model, 'encoder_reduction_factor') and model.encoder_reduction_factor > 1: + ilens_in = ilens.new([ilen // model.encoder_reduction_factor for ilen in ilens]) + else: + ilens_in = ilens + # work for speech to speech model's input + if "task_name" in sample and sample["task_name"] == "s2s": + m = None + if hasattr(model, 'encoder_prenet'): + m = model.encoder_prenet + elif hasattr(model, 'speech_encoder_prenet'): + m = model.speech_encoder_prenet + if m is not None and isinstance(m, SpeechEncoderPrenet): + ilens_in = m.get_src_lengths(ilens_in) + # calculate for encoder-decoder + if "encoder-decoder" in self.modules_applied_guided_attn: + attn = [att_l[:, : self.num_heads_applied_guided_attn] for att_l in attn] + att_ws = torch.cat(attn, dim=1) # (B, H*L, T_out, T_in) + enc_dec_attn_loss = self.attn_criterion(att_ws, ilens_in, olens_in) + loss = loss + enc_dec_attn_loss + + return loss, l1_loss, l2_loss, bce_loss, enc_dec_attn_loss + + @classmethod + def reduce_metrics(cls, logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + l1_loss_sum = sum(log.get("l1_loss", 0) for log in logging_outputs) + l2_loss_sum = sum(log.get("l2_loss", 0) for log in logging_outputs) + bce_loss_sum = sum(log.get("bce_loss", 0) for log in logging_outputs) + sample_size = max(1, sum(log.get("sample_size", 0) for log in logging_outputs)) + metrics.log_scalar( + "loss", loss_sum / sample_size, sample_size, 1, round=5 + ) + encoder_alpha_sum = sum(log.get("encoder_alpha", 0) for log in logging_outputs) + decoder_alpha_sum = sum(log.get("decoder_alpha", 0) for log in logging_outputs) + ngpu = sum(log.get("ngpu", 0) for log in logging_outputs) + + metrics.log_scalar( + "l1_loss", l1_loss_sum / sample_size, sample_size, 2, round=5 + ) + metrics.log_scalar( + "l2_loss", l2_loss_sum / sample_size, sample_size, 2, round=5 + ) + metrics.log_scalar( + "bce_loss", bce_loss_sum / sample_size, sample_size, 2, round=5 + ) + metrics.log_scalar( + "encoder_alpha", encoder_alpha_sum / sample_size, sample_size, round=5 + ) + metrics.log_scalar( + "decoder_alpha", decoder_alpha_sum / sample_size, sample_size, round=5 + ) + + if "enc_dec_attn_loss" in logging_outputs[0]: + enc_dec_attn_loss_sum = sum(log.get("enc_dec_attn_loss", 0) for log in logging_outputs) + metrics.log_scalar( + "enc_dec_attn_loss", enc_dec_attn_loss_sum / sample_size, sample_size, round=8 + ) + + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True + +class Tacotron2Loss(torch.nn.Module): + """Loss function module for Tacotron2.""" + + def __init__( + self, use_masking=True, use_weighted_masking=False, bce_pos_weight=20.0 + ): + """Initialize Tactoron2 loss module. + + Args: + use_masking (bool): Whether to apply masking + for padded part in loss calculation. + use_weighted_masking (bool): + Whether to apply weighted masking in loss calculation. + bce_pos_weight (float): Weight of positive sample of stop token. + + """ + super(Tacotron2Loss, self).__init__() + assert (use_masking != use_weighted_masking) or not use_masking + self.use_masking = use_masking + self.use_weighted_masking = use_weighted_masking + + # define criterions + # reduction = "none" if self.use_weighted_masking else "sum" + reduction = "none" if self.use_weighted_masking else "mean" + self.l1_criterion = torch.nn.L1Loss(reduction=reduction) + self.mse_criterion = torch.nn.MSELoss(reduction=reduction) + self.bce_criterion = torch.nn.BCEWithLogitsLoss( + reduction=reduction, pos_weight=torch.tensor(bce_pos_weight) + ) + + # NOTE(kan-bayashi): register pre hook function for the compatibility + self._register_load_state_dict_pre_hook(self._load_state_dict_pre_hook) + + def forward(self, after_outs, before_outs, logits, ys, labels, olens): + """Calculate forward propagation. + + Args: + after_outs (Tensor): Batch of outputs after postnets (B, Lmax, odim). + before_outs (Tensor): Batch of outputs before postnets (B, Lmax, odim). + logits (Tensor): Batch of stop logits (B, Lmax). + ys (Tensor): Batch of padded target features (B, Lmax, odim). + labels (LongTensor): Batch of the sequences of stop token labels (B, Lmax). + olens (LongTensor): Batch of the lengths of each target (B,). + + Returns: + Tensor: L1 loss value. + Tensor: Mean square error loss value. + Tensor: Binary cross entropy loss value. + + """ + # make mask and apply it + if self.use_masking: + masks = make_non_pad_mask(olens).unsqueeze(-1).to(ys.device) + ys = ys.masked_select(masks) + after_outs = after_outs.masked_select(masks) + before_outs = before_outs.masked_select(masks) + labels = labels.masked_select(masks[:, :, 0]) + logits = logits.masked_select(masks[:, :, 0]) + + # calculate loss + l1_loss = self.l1_criterion(after_outs, ys) + self.l1_criterion(before_outs, ys) + mse_loss = self.mse_criterion(after_outs, ys) + self.mse_criterion( + before_outs, ys + ) + bce_loss = self.bce_criterion(logits, labels) + + # make weighted mask and apply it + if self.use_weighted_masking: + masks = make_non_pad_mask(olens).unsqueeze(-1).to(ys.device) + weights = masks.float() / masks.sum(dim=1, keepdim=True).float() + out_weights = weights.div(ys.size(0) * ys.size(2)) + logit_weights = weights.div(ys.size(0)) + + # apply weight + l1_loss = l1_loss.mul(out_weights).masked_select(masks).sum() + mse_loss = mse_loss.mul(out_weights).masked_select(masks).sum() + bce_loss = ( + bce_loss.mul(logit_weights.squeeze(-1)) + .masked_select(masks.squeeze(-1)) + .sum() + ) + + return l1_loss, mse_loss, bce_loss + + def _load_state_dict_pre_hook( + self, + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ): + """Apply pre hook fucntion before loading state dict. + + From v.0.6.1 `bce_criterion.pos_weight` param is registered as a parameter but + old models do not include it and as a result, it causes missing key error when + loading old model parameter. This function solve the issue by adding param in + state dict before loading as a pre hook function + of the `load_state_dict` method. + + """ + key = prefix + "bce_criterion.pos_weight" + if key not in state_dict: + state_dict[key] = self.bce_criterion.pos_weight + +class GuidedMultiHeadAttentionLoss(GuidedAttentionLoss): + """Guided attention loss function module for multi head attention. + Args: + sigma (float, optional): Standard deviation to control + how close attention to a diagonal. + alpha (float, optional): Scaling coefficient (lambda). + reset_always (bool, optional): Whether to always reset masks. + """ + + def forward(self, att_ws, ilens, olens): + """Calculate forward propagation. + Args: + att_ws (Tensor): + Batch of multi head attention weights (B, H, T_max_out, T_max_in). + ilens (LongTensor): Batch of input lenghts (B,). + olens (LongTensor): Batch of output lenghts (B,). + Returns: + Tensor: Guided attention loss value. + """ + if self.guided_attn_masks is None: + self.guided_attn_masks = ( + self._make_guided_attention_masks(ilens, olens) + .to(att_ws.device) + .unsqueeze(1) + ) + if self.masks is None: + self.masks = self._make_masks(ilens, olens).to(att_ws.device).unsqueeze(1) + losses = self.guided_attn_masks * att_ws + loss = torch.mean(losses.masked_select(self.masks)) + if self.reset_always: + self._reset_masks() + + return self.alpha * loss + + def _make_guided_attention_masks(self, ilens, olens): + n_batches = len(ilens) + max_ilen = max(ilens) + max_olen = max(olens) + guided_attn_masks = torch.zeros((n_batches, max_olen, max_ilen), device=olens.device) + for idx, (ilen, olen) in enumerate(zip(ilens, olens)): + guided_attn_masks[idx, :olen, :ilen] = self._make_guided_attention_mask( + ilen, olen, self.sigma + ) + return guided_attn_masks + + @staticmethod + def _make_guided_attention_mask(ilen, olen, sigma): + grid_x, grid_y = torch.meshgrid(torch.arange(olen, device=olen.device), torch.arange(ilen, device=olen.device)) + grid_x, grid_y = grid_x.float(), grid_y.float() + return 1.0 - torch.exp( + -((grid_y / ilen - grid_x / olen) ** 2) / (2 * (sigma**2)) + ) + + @staticmethod + def _make_masks(ilens, olens): + in_masks = make_non_pad_mask(ilens).to(ilens.device) # (B, T_in) + out_masks = make_non_pad_mask(olens).to(olens.device) # (B, T_out) + return out_masks.unsqueeze(-1) & in_masks.unsqueeze(-2) # (B, T_out, T_in) diff --git a/SpeechT5/SpeechT5/speecht5/data/__init__.py b/SpeechT5/SpeechT5/speecht5/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SpeechT5/SpeechT5/speecht5/data/__pycache__/__init__.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/data/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e0226e62dea15a1e80ce9e560a394d3a32257e5 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/data/__pycache__/__init__.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/data/__pycache__/multitask_dataset.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/data/__pycache__/multitask_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa00a08940d484cd890310682f8b426b6b563663 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/data/__pycache__/multitask_dataset.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_dataset.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40e0db79129b361eac237b86739f11db827a09d9 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_dataset.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_to_class_dataset.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_to_class_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1c9dbfac7e10d58af0c345770d75978b07dc40d3 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_to_class_dataset.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_to_speech_dataset.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_to_speech_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..026a1e64f1d1d9337f420e53d697ace338bb8c51 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_to_speech_dataset.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_to_text_dataset.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_to_text_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..624ef9f3e92c71946eb8df22e611ec0864fb9aab Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/data/__pycache__/speech_to_text_dataset.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/data/__pycache__/text_dataset.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/data/__pycache__/text_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd6c6453d30e3bf4f5533a09025f87a7172c2868 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/data/__pycache__/text_dataset.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/data/__pycache__/text_to_speech_dataset.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/data/__pycache__/text_to_speech_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d66f223538760d93bea728d59df3753671668e6 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/data/__pycache__/text_to_speech_dataset.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/data/multitask_dataset.py b/SpeechT5/SpeechT5/speecht5/data/multitask_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..65e13cf0e9b640bf94618e4c746f5d37fbcac3ee --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/data/multitask_dataset.py @@ -0,0 +1,265 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import bisect + +import logging +import numpy as np +from torch.utils.data.dataloader import default_collate +from fairseq.data import data_utils + +from fairseq.data.fairseq_dataset import FairseqDataset + +logger = logging.getLogger(__name__) + +class MultitaskDataset(FairseqDataset): + @staticmethod + def cumsum(sequence): + r, s = [], 0 + for e in sequence: + curr_len = len(e) + r.append(curr_len + s) + s += curr_len + return r + + def __init__(self, datasets, sample_ratios=1, batch_ratio=None): + super(MultitaskDataset, self).__init__() + assert len(datasets) > 0, "datasets should not be an empty iterable" + self.datasets = list(datasets) + if isinstance(sample_ratios, int): + sample_ratios = [sample_ratios] * len(self.datasets) + if batch_ratio is not None: + logger.info('batch ratio is ' + str(batch_ratio)) + self.batch_ratio = batch_ratio + else: + self.batch_ratio = None + else: + logger.info('set sample ratio to ' + str(sample_ratios)) + if batch_ratio is not None: + logger.info('batch ratio is ' + str(batch_ratio)) + self.batch_ratio = batch_ratio + else: + self.batch_ratio = None + self.sample_ratios = sample_ratios + self._ordered_indices = None + self._update_size() + + def __len__(self): + return self.cumulative_sizes[-1] + + def __getitem__(self, idx): + dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx) + sample = self.datasets[dataset_idx][sample_idx] + if isinstance(sample, dict): + sample["dataset_idx"] = dataset_idx + else: + sample = sample + (dataset_idx,) + return sample + + def _update_size(self): + self.cumulative_sizes = self.cumsum(self.datasets) + self.real_sizes = [len(d) for d in self.datasets] + + def _get_dataset_and_sample_index(self, idx: int): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + sample_idx = sample_idx % self.real_sizes[dataset_idx] + return dataset_idx, sample_idx + + def collater(self, samples, **extra_args): + # For now only supports datasets with same underlying collater implementations + if samples is not None and len(samples) > 0: + if isinstance(samples[0], dict): + dataset_idx = samples[0]["dataset_idx"] + else: + dataset_idx = samples[0][-1] + samples = [sample[:-1] for sample in samples] + else: + dataset_idx = 0 + + if hasattr(self.datasets[dataset_idx], "collater"): + return self.datasets[dataset_idx].collater(samples, **extra_args) + else: + return default_collate(samples, **extra_args) + + def size(self, idx: int): + """ + Return an example's size as a float or tuple. + """ + dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx) + return self.datasets[dataset_idx].size(sample_idx) + + def num_tokens(self, index: int): + return np.max(self.size(index)) + + def attr(self, attr: str, index: int): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, index) + return getattr(self.datasets[dataset_idx], attr, None) + + @property + def sizes(self): + _dataset_sizes = [] + for ds in self.datasets: + if isinstance(ds.sizes, np.ndarray): + _dataset_sizes.append(ds.sizes) + else: + # Only support underlying dataset with single size array. + assert isinstance(ds.sizes, list) + _dataset_sizes.append(ds.sizes[0]) + return np.concatenate(_dataset_sizes) + + @property + def supports_prefetch(self): + return all(d.supports_prefetch for d in self.datasets) + + def ordered_indices(self): + # ordered_indices = [] + # for i, dataset in enumerate(self.datasets): + # indice = dataset.ordered_indices() + # ordered_indices.append(indice) + if self._ordered_indices is None: + # Call the underlying dataset's ordered_indices() here, so that we + # get the same random ordering as we would have from using the + # underlying sub-datasets directly. + self._ordered_indices = [ + dataset.ordered_indices() + for dataset in self.datasets + ] + return np.arange(len(self)) + + def prefetch(self, indices): + frm = 0 + for to, ds in zip(self.cumulative_sizes, self.datasets): + real_size = len(ds) + if getattr(ds, "supports_prefetch", False): + ds.prefetch([(i - frm) % real_size for i in indices if frm <= i < to]) + frm = to + + def batch_by_size( + self, + indices, + max_tokens=None, + max_sentences=None, + required_batch_size_multiple=1, + ): + if not hasattr(self, "max_tokens"): + self.max_tokens = max_tokens + if not hasattr(self, "max_sentences"): + self.max_sentences = max_sentences + if not hasattr(self, "required_batch_size_multiple"): + self.required_batch_size_multiple = required_batch_size_multiple + batch_samplers = [] + for i, dataset in enumerate(self.datasets): + batch_sampler = dataset.batch_by_size( + self._ordered_indices[i], + max_tokens=max_tokens if self.batch_ratio is None else max_tokens * self.batch_ratio[i], + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + ) + if i > 0: + for batch in batch_sampler: + batch += self.cumulative_sizes[i - 1] + if self.sample_ratios[i] != 1.0: + batch_sampler = np.array(batch_sampler) + batch_sampler = np.random.choice(batch_sampler, int(len(batch_sampler) * self.sample_ratios[i])) + batch_sampler = list(batch_sampler) + logger.info('Adjust batch by ratio ' + str(self.sample_ratios[i]) + ' and the number of batch is ' + str(int(len(batch_sampler))) + ' for dataset ' + str(i)) + batch_samplers.extend(batch_sampler) + return batch_samplers + + def filter_indices_by_size(self, indices, max_positions): + """ + Filter each sub-dataset independently, then update the round robin to work + on the filtered sub-datasets. + """ + if not hasattr(self, "max_positions"): + self.max_positions = max_positions + ignored_some = False + for i in range(len(self.datasets)): + # ignored = [] + self._ordered_indices[i], ignored = self.datasets[i].filter_indices_by_size( + self._ordered_indices[i], self.max_positions[i] + ) + if len(ignored) > 0: + ignored_some = True + logger.warning( + f"{len(ignored)} samples from {i} have invalid sizes and will be skipped, " + f"max_positions={self.max_positions[i]}, first few sample ids={ignored[:10]}" + ) + + logger.info('update dataset size') + self._update_size() + + # Since we are modifying in place the _ordered_indices, + # it's not possible anymore to return valid ignored indices. + # Hopefully the extra debug information print above should be enough to debug. + # Ideally we would receive ignore_invalid_inputs so that we could have + # a proper error message. + return (np.arange(len(self)), [0] if ignored_some else []) + + @property + def can_reuse_epoch_itr_across_epochs(self): + return all(d.can_reuse_epoch_itr_across_epochs for d in self.datasets) + + def set_epoch(self, epoch): + super().set_epoch(epoch) + for ds in self.datasets: + if hasattr(ds, "set_epoch"): + ds.set_epoch(epoch) + + def shuffle_batches(self, batches, seed): + logger.info("shuffle batches") + new_batches_fromlist = [] + new_batches_notlist = [] + new_batches = [] + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + for batch in batches: + if isinstance(batch, list): + # np.random.shuffle(batch) + new_batches_fromlist.append(batch) + else: + new_batches_notlist.append(batch) + logger.info("Get " + str(len(new_batches_fromlist)) + " chunk from speech sides") + logger.info("Get " + str(sum([len(batch_list) for batch_list in new_batches_fromlist])) + " batches from speech sides") + logger.info("Get " + str(len(new_batches_notlist)) + " batches from text sides") + if len(new_batches_fromlist) == 0: + return new_batches_notlist + st_ratio = int(len(new_batches_notlist) / len(new_batches_fromlist)) + logger.info("Get st_ratio " + str(st_ratio)) + last_idx = 0 + for i in range(len(new_batches_fromlist)): + if i == len(new_batches_fromlist) - 1: + new_batches_fromlist[i].extend(new_batches_notlist[last_idx:]) + else: + new_batches_fromlist[i].extend(new_batches_notlist[last_idx : last_idx + st_ratio]) + np.random.shuffle(new_batches_fromlist[i]) + new_batches.extend(new_batches_fromlist[i]) + last_idx = last_idx + st_ratio + logger.info("Finish shuffle") + return new_batches + + def reset_batch_sampler(self): + logger.info("reset batch sampler") + self._ordered_indices = [ + self.datasets[i].ordered_indices() + for i in range(len(self.datasets)) + ] + self.filter_indices_by_size(None, None) + + batch_samplers = self.batch_by_size( + None, + self.max_tokens, + self.max_sentences, + self.required_batch_size_multiple + ) + return batch_samplers diff --git a/SpeechT5/SpeechT5/speecht5/data/speech_dataset.py b/SpeechT5/SpeechT5/speecht5/data/speech_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c339ee1b0b195abace434e67b9d8518d596de2d6 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/data/speech_dataset.py @@ -0,0 +1,476 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import itertools +import logging +import os +import sys +from typing import Any, List, Optional, Union + +import numpy as np + +import torch +import torch.nn.functional as F +import librosa +from fairseq.data.audio.speech_to_text_dataset import get_features_or_waveform +from fairseq.data import data_utils +from fairseq.data.fairseq_dataset import FairseqDataset + +logger = logging.getLogger(__name__) + +def _collate_frames( + frames: List[torch.Tensor], is_audio_input: bool = False +): + """ + Convert a list of 2D frames into a padded 3D tensor + Args: + frames (list): list of 2D frames of size L[i]*f_dim. Where L[i] is + length of i-th frame and f_dim is static dimension of features + Returns: + 3D tensor of size len(frames)*len_max*f_dim where len_max is max of L[i] + """ + max_len = max(frame.size(0) for frame in frames) + if is_audio_input: + out = frames[0].new_zeros((len(frames), max_len)) + else: + out = frames[0].new_zeros((len(frames), max_len, frames[0].size(1))) + for i, v in enumerate(frames): + out[i, : v.size(0)] = v + return out + +def add_first_frame_and_remove_last_frame(ys): + ys_in = torch.cat( + [ys.new_zeros((ys.shape[0], 1, ys.shape[2])), ys[:, :-1]], dim=1 + ) + return ys_in + +def load_audio(manifest_path, max_keep, min_keep): + n_long, n_short = 0, 0 + names, inds, sizes, spk_embeds = [], [], [], [] + with open(manifest_path) as f: + root = f.readline().strip() + for ind, line in enumerate(f): + items = line.strip().split("\t") + assert len(items) == 3, line + sz = int(items[1]) + if min_keep is not None and sz < min_keep: + n_short += 1 + elif max_keep is not None and sz > max_keep: + n_long += 1 + else: + names.append(items[0]) + spk_embeds.append(items[2]) + inds.append(ind) + sizes.append(sz) + tot = ind + 1 + logger.info( + ( + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, names, inds, tot, sizes, spk_embeds + + +def load_label(label_path, inds, tot): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds, tot): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + assert ( + len(code_lengths) == tot + ), f"number of labels does not match ({len(code_lengths)} != {tot})" + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + +def verify_label_lengths( + audio_sizes, + audio_rate, + label_path, + label_rate, + inds, + tot, + tol=0.1, # tolerance in seconds +): + if label_rate < 0: + logger.info(f"{label_path} is sequence label. skipped") + return + + with open(label_path) as f: + lengths = [len(line.rstrip().split()) for line in f] + assert len(lengths) == tot + lengths = [lengths[i] for i in inds] + num_invalid = 0 + for i, ind in enumerate(inds): + dur_from_audio = audio_sizes[i] / audio_rate + dur_from_label = lengths[i] / label_rate + if abs(dur_from_audio - dur_from_label) > tol: + logger.warning( + ( + f"audio and label duration differ too much " + f"(|{dur_from_audio} - {dur_from_label}| > {tol}) " + f"in line {ind+1} of {label_path}. Check if `label_rate` " + f"is correctly set (currently {label_rate}). " + f"num. of samples = {audio_sizes[i]}; " + f"label length = {lengths[i]}" + ) + ) + num_invalid += 1 + if num_invalid > 0: + logger.warning( + f"total {num_invalid} (audio, label) pairs with mismatched lengths" + ) + + +def logmelfilterbank( + audio, + sampling_rate, + fft_size=1024, + hop_size=256, + win_length=None, + window="hann", + num_mels=80, + fmin=80, + fmax=7600, + eps=1e-10, +): + """Compute log-Mel filterbank feature. + (https://github.com/kan-bayashi/ParallelWaveGAN/blob/master/parallel_wavegan/bin/preprocess.py) + + Args: + audio (ndarray): Audio signal (T,). + sampling_rate (int): Sampling rate. + fft_size (int): FFT size. + hop_size (int): Hop size. + win_length (int): Window length. If set to None, it will be the same as fft_size. + window (str): Window function type. + num_mels (int): Number of mel basis. + fmin (int): Minimum frequency in mel basis calculation. + fmax (int): Maximum frequency in mel basis calculation. + eps (float): Epsilon value to avoid inf in log calculation. + + Returns: + ndarray: Log Mel filterbank feature (#frames, num_mels). + + """ + # get amplitude spectrogram + x_stft = librosa.stft(audio, n_fft=fft_size, hop_length=hop_size, + win_length=win_length, window=window, pad_mode="reflect") + spc = np.abs(x_stft).T # (#frames, #bins) + + # get mel basis + fmin = 0 if fmin is None else fmin + fmax = sampling_rate / 2 if fmax is None else fmax + mel_basis = librosa.filters.mel(sr=sampling_rate, n_fft=fft_size, n_mels=num_mels, fmin=fmin, fmax=fmax) + + return np.log10(np.maximum(eps, np.dot(spc, mel_basis.T))) + + +class SpeechPretrainDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + random_crop: bool = False, + single_target: bool = False, + reduction_factor: int = 1, + ): + self.audio_root, self.audio_names, inds, tot, self.sizes, self.spk_embeds = load_audio( + manifest_path, max_keep_sample_size, min_keep_sample_size + ) + self.sample_rate = sample_rate + self.shuffle = shuffle + self.random_crop = random_crop + + self.num_labels = len(label_paths) + self.pad_list = pad_list + self.eos_list = eos_list + self.label_processors = label_processors + self.single_target = single_target + self.label_rates = ( + [label_rates for _ in range(len(label_paths))] + if isinstance(label_rates, float) + else label_rates + ) + self.store_labels = store_labels + if store_labels: + self.label_list = [load_label(p, inds, tot) for p in label_paths] + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds, tot) for p in label_paths + ] + assert label_processors is None or len(label_processors) == self.num_labels + for label_path, label_rate in zip(label_paths, self.label_rates): + verify_label_lengths( + self.sizes, sample_rate, label_path, label_rate, inds, tot + ) + + self.max_sample_size = ( + max_sample_size if max_sample_size is not None else sys.maxsize + ) + self.pad_audio = pad_audio + self.normalize = normalize + self.reduction_factor = reduction_factor + logger.info( + f"pad_audio={pad_audio}, random_crop={random_crop}, reduction_factor={reduction_factor}, " + f"normalize={normalize}, max_sample_size={self.max_sample_size}" + ) + + def get_audio(self, index): + import soundfile as sf + + wav_path = os.path.join(self.audio_root, self.audio_names[index]) + wav, cur_sample_rate = sf.read(wav_path) + wav = torch.from_numpy(wav).float() + fbank = logmelfilterbank( + wav.view(-1).cpu().numpy(), 16000 + ) + fbank = torch.from_numpy(fbank).float() + wav = self.postprocess(wav, cur_sample_rate) + return wav, fbank + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + def __getitem__(self, index): + wav, fbank = self.get_audio(index) + labels = self.get_labels(index) + spkembs = get_features_or_waveform( + os.path.join(self.audio_root, self.spk_embeds[index]) + ) + spkembs = torch.from_numpy(spkembs).float() + return {"id": index, "source": wav, "target": fbank, "label_list": labels, 'spkembs': spkembs} + + def __len__(self): + return len(self.sizes) + + def crop_to_max_size(self, wav, target_size): + size = len(wav) + diff = size - target_size + if diff <= 0: + return wav, 0 + + start, end = 0, target_size + if self.random_crop: + start = np.random.randint(0, diff + 1) + end = size - diff + start + return wav[start:end], start + + def collater(self, samples): + # target = max(sizes) -> random_crop not used + # target = max_sample_size -> random_crop used for long + samples = [s for s in samples if s["source"] is not None] + if len(samples) == 0: + return {} + + audios = [s["source"] for s in samples] + audio_sizes = [len(s) for s in audios] + + fbanks = [s["target"] for s in samples] + fbank_sizes = [len(s) for s in fbanks] + + if self.pad_audio: + audio_size = min(max(audio_sizes), self.max_sample_size) + else: + audio_size = min(min(audio_sizes), self.max_sample_size) + collated_audios, padding_mask, audio_starts = self.collater_audio( + audios, audio_size + ) + + collated_fbanks = [] + collated_audios_size = [] + for i in range(len(fbanks)): + fbank_start = int(audio_starts[i] / (audio_sizes[i] / fbank_sizes[i])) + fbank_size = int(audio_size / (audio_sizes[i] / fbank_sizes[i])) + fbank_end = min(fbank_start + fbank_size, fbank_sizes[i]) + collated_fbanks.append(fbanks[i][fbank_start : fbank_end]) + collated_audios_size.append(audio_size) + collated_fbanks_size = [len(s) for s in collated_fbanks] + collated_fbanks = _collate_frames(collated_fbanks) + collated_fbanks_size = torch.tensor(collated_fbanks_size, dtype=torch.long) + + # thin out frames for reduction factor (B, Lmax, odim) -> (B, Lmax//r, odim) + if self.reduction_factor > 1: + collated_fbanks_in = collated_fbanks[:, self.reduction_factor - 1 :: self.reduction_factor] + collated_fbanks_size_in = collated_fbanks_size.new([torch.div(olen, self.reduction_factor, rounding_mode='floor') for olen in collated_fbanks_size]) + else: + collated_fbanks_in, collated_fbanks_size_in = collated_fbanks, collated_fbanks_size + + prev_output_tokens = torch.cat( + [collated_fbanks_in.new_zeros((collated_fbanks_in.shape[0], 1, collated_fbanks_in.shape[2])), collated_fbanks_in[:, :-1]], dim=1 + ) + + # make labels for stop prediction + labels = collated_fbanks.new_zeros(collated_fbanks.size(0), collated_fbanks.size(1)) + for i, l in enumerate(fbank_sizes): + labels[i, l - 1 :] = 1.0 + + spkembs = _collate_frames([s["spkembs"] for s in samples], is_audio_input=True) + + targets_by_label = [ + [s["label_list"][i] for s in samples] for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, audio_size, audio_starts + ) + + net_input = { + "source": collated_audios, + "padding_mask": padding_mask, + "prev_output_tokens": prev_output_tokens, + "spkembs": spkembs, + "tgt_lengths": collated_fbanks_size_in, + } + + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + "labels": labels, + "dec_target": collated_fbanks, + "dec_target_lengths": collated_fbanks_size, + "src_lengths": collated_audios_size, + "task_name": 'speech_pretrain', + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + return batch + + def collater_audio(self, audios, audio_size): + collated_audios = audios[0].new_zeros(len(audios), audio_size) + padding_mask = ( + torch.BoolTensor(collated_audios.shape).fill_(False) + # if self.pad_audio else None + ) + audio_starts = [0 for _ in audios] + for i, audio in enumerate(audios): + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + assert self.pad_audio + collated_audios[i] = torch.cat([audio, audio.new_full((-diff,), 0.0)]) + padding_mask[i, diff:] = True + else: + collated_audios[i], audio_starts[i] = self.crop_to_max_size( + audio, audio_size + ) + return collated_audios, padding_mask, audio_starts + + def collater_frm_label(self, targets, audio_size, audio_starts, label_rate, pad): + assert label_rate > 0 + s2f = label_rate / self.sample_rate + frm_starts = [int(round(s * s2f)) for s in audio_starts] + frm_size = int(round(audio_size * s2f)) + if not self.pad_audio: + rem_size = [len(t) - s for t, s in zip(targets, frm_starts)] + frm_size = min(frm_size, *rem_size) + targets = [t[s : s + frm_size] for t, s in zip(targets, frm_starts)] + logger.debug(f"audio_starts={audio_starts}") + logger.debug(f"frame_starts={frm_starts}") + logger.debug(f"frame_size={frm_size}") + + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_label(self, targets_by_label, audio_size, audio_starts): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + if label_rate == -1.0: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + else: + targets, lengths, ntokens = self.collater_frm_label( + targets, audio_size, audio_starts, label_rate, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + if self.pad_audio: + return self.sizes[index] + return min(self.sizes[index], self.max_sample_size) + + def ordered_indices(self): + if self.shuffle: + order = [np.random.permutation(len(self))] + else: + order = [np.arange(len(self))] + + order.append(self.sizes) + return np.lexsort(order)[::-1] + + def postprocess(self, wav, cur_sample_rate): + if wav.dim() == 2: + wav = wav.mean(-1) + assert wav.dim() == 1, wav.dim() + + if cur_sample_rate != self.sample_rate: + raise Exception(f"sr {cur_sample_rate} != {self.sample_rate}") + + if self.normalize: + with torch.no_grad(): + wav = F.layer_norm(wav, wav.shape) + return wav diff --git a/SpeechT5/SpeechT5/speecht5/data/speech_to_class_dataset.py b/SpeechT5/SpeechT5/speecht5/data/speech_to_class_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..dda301f1c1e78519d941c07d7fc8ea918858cffd --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/data/speech_to_class_dataset.py @@ -0,0 +1,262 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import logging +import os +from typing import Any, List, Optional + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils, Dictionary +from fairseq.data.fairseq_dataset import FairseqDataset + +logger = logging.getLogger(__name__) + + +def load_audio(manifest_path, max_keep, min_keep): + """manifest tsv: wav_path, wav_nframe, wav_class + + Args + manifest_path: str + max_keep: int + min_keep: int + + Return + root, names, inds, tot, sizes, classes + """ + n_long, n_short = 0, 0 + names, inds, sizes, classes = [], [], [], [] + with open(manifest_path) as f: + root = f.readline().strip() + for ind, line in enumerate(f): + items = line.strip().split("\t") + assert len(items) >= 2, line + sz = int(items[1]) + if min_keep is not None and sz < min_keep: + n_short += 1 + elif max_keep is not None and sz > max_keep: + n_long += 1 + else: + names.append(items[0]) + if len(items) > 2: + classes.append(items[2]) + inds.append(ind) + sizes.append(sz) + tot = ind + 1 + logger.info( + ( + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + if len(classes) == 0: + logger.warn("no classes loaded only if inference") + return root, names, inds, tot, sizes, classes + + +def sample_from_feature(x: np.ndarray, max_segment_length: int = 300): + """Load a segment within 300-400/51200-76800 frames or the corresponding samples from a utterance. + + Args: + x (np.ndarray): feature or waveform (frames[, features]), e.g., log mel filter bank or waveform + max_segment_length (int, optional): maximum segment length. Defaults to 400. + + Returns: + np.ndarray: segmented features + """ + if len(x) <= max_segment_length: + return x + start = np.random.randint(0, x.shape[0] - max_segment_length) + return x[start: start + max_segment_length] + + +class SpeechToClassDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + shuffle: bool = True, + normalize: bool = False, + tgt_dict: Optional[Dictionary] = None, + max_length: Optional[int] = None + ): + self.audio_root, self.audio_names, inds, tot, self.wav_sizes, self.wav_classes = load_audio( + manifest_path, max_keep_sample_size, min_keep_sample_size + ) + self.sample_rate = sample_rate + self.shuffle = shuffle + + self.label_processors = label_processors + + self.normalize = normalize + self.tgt_dict = tgt_dict + self.max_length = max_length + logger.info( + f"max_length={max_length}, normalize={normalize}" + ) + + def get_audio(self, index): + import soundfile as sf + + wav_path = os.path.join(self.audio_root, self.audio_names[index]) + wav, cur_sample_rate = sf.read(wav_path) + if self.max_length is not None: + wav = sample_from_feature(wav, self.max_length) + wav = torch.from_numpy(wav).float() + wav = self.postprocess(wav, cur_sample_rate) + return wav + + def get_label(self, index): + label = self.wav_classes[index] + + if self.label_processors is not None: + label = self.label_processors(label) + return label + + def __getitem__(self, index): + wav = self.get_audio(index) + label = None + if len(self.wav_classes) == len(self.audio_names): + label = self.get_label(index) + return {"id": index, "source": wav, "label": label} + + def __len__(self): + return len(self.wav_sizes) + + def collater(self, samples): + samples = [s for s in samples if s["source"] is not None] + if len(samples) == 0: + return {} + + audios = [s["source"] for s in samples] + audio_sizes = [len(s) for s in audios] + + audio_size = max(audio_sizes) + collated_audios, padding_mask = self.collater_audio( + audios, audio_size + ) + + decoder_label = None + decoder_target = None + decoder_target_lengths = None + if samples[0]["label"] is not None: + targets_by_label = [ + [s["label"] for s in samples] + ] + targets_list, lengths_list, ntokens_list = self.collater_label(targets_by_label) + + decoder_label = [ + (targets_list[0][i, :lengths_list[0][i]]).long() + for i in range(targets_list[0].size(0)) + ] + + decoder_target = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos(), + left_pad=False, + move_eos_to_beginning=False, + ) + decoder_target_lengths = torch.tensor( + [x.size(0) for x in decoder_label], dtype=torch.long + ) + prev_output_tokens = data_utils.collate_tokens( + [torch.LongTensor([-1]) for _ in samples], + self.tgt_dict.pad(), + self.tgt_dict.eos(), + left_pad=False, + move_eos_to_beginning=True, + ) + + net_input = { + "source": collated_audios, + "padding_mask": padding_mask, + "prev_output_tokens": prev_output_tokens, + "task_name": "s2c", + } + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + "target": decoder_target, + "target_lengths": decoder_target_lengths, + "task_name": "s2c", + "ntokens": len(samples), + } + + return batch + + def collater_audio(self, audios, audio_size): + collated_audios = audios[0].new_zeros(len(audios), audio_size) + padding_mask = ( + torch.BoolTensor(collated_audios.shape).fill_(False) + ) + for i, audio in enumerate(audios): + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + collated_audios[i] = torch.cat([audio, audio.new_full((-diff,), 0.0)]) + padding_mask[i, diff:] = True + else: + raise Exception("Diff should not be larger than 0") + return collated_audios, padding_mask + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_label(self, targets_by_label): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, [self.tgt_dict.pad()]) + for targets, pad in itr: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + return self.wav_sizes[index] + + @property + def sizes(self): + return np.array(self.wav_sizes) + + def ordered_indices(self): + if self.shuffle: + order = [np.random.permutation(len(self))] + else: + order = [np.arange(len(self))] + + order.append(self.wav_sizes) + return np.lexsort(order)[::-1] + + def postprocess(self, wav, cur_sample_rate): + if wav.dim() == 2: + wav = wav.mean(-1) + assert wav.dim() == 1, wav.dim() + + if cur_sample_rate != self.sample_rate: + raise Exception(f"sr {cur_sample_rate} != {self.sample_rate}") + + if self.normalize: + with torch.no_grad(): + wav = F.layer_norm(wav, wav.shape) + return wav diff --git a/SpeechT5/SpeechT5/speecht5/data/speech_to_speech_dataset.py b/SpeechT5/SpeechT5/speecht5/data/speech_to_speech_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c9c195d74dacbaaeb9c94aea69b351a75e3bc91a --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/data/speech_to_speech_dataset.py @@ -0,0 +1,282 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import logging +import os +from typing import Any, List, Optional + +import librosa +import numpy as np +import torch +import torch.nn.functional as F +from fairseq.data.fairseq_dataset import FairseqDataset + +logger = logging.getLogger(__name__) + +def _collate_frames( + frames: List[torch.Tensor], is_audio_input: bool = False +): + """ + Convert a list of 2D frames into a padded 3D tensor + Args: + frames (list): list of 2D frames of size L[i]*f_dim. Where L[i] is + length of i-th frame and f_dim is static dimension of features + Returns: + 3D tensor of size len(frames)*len_max*f_dim where len_max is max of L[i] + """ + max_len = max(frame.size(0) for frame in frames) + if is_audio_input: + out = frames[0].new_zeros((len(frames), max_len)) + else: + out = frames[0].new_zeros((len(frames), max_len, frames[0].size(1))) + for i, v in enumerate(frames): + out[i, : v.size(0)] = v + return out + +def load_audio(manifest_path, max_keep, min_keep): + """manifest tsv: src_wav, src_nframe, tgt_wav, tgt_nframe, tgt_spkemb""" + n_long, n_short = 0, 0 + src_names, tgt_names, inds, sizes, tgt_sizes, spk_embeds = [], [], [], [], [], [] + with open(manifest_path) as f: + root = f.readline().strip() + for ind, line in enumerate(f): + items = line.strip().split("\t") + assert len(items) >= 2, line + sz = int(items[1]) + if min_keep is not None and sz < min_keep: + n_short += 1 + elif max_keep is not None and sz > max_keep: + n_long += 1 + else: + src_names.append(items[0]) + tgt_names.append(items[2]) + tgt_sizes.append(items[3]) + spk_embeds.append(items[4]) + inds.append(ind) + sizes.append(sz) + tot = ind + 1 + logger.info( + ( + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(src_names)}, skipped {n_short} short and {n_long} long, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, src_names, inds, tot, sizes, tgt_names, tgt_sizes, spk_embeds + + +def logmelfilterbank( + audio, + sampling_rate, + fft_size=1024, + hop_size=256, + win_length=None, + window="hann", + num_mels=80, + fmin=80, + fmax=7600, + eps=1e-10, +): + """Compute log-Mel filterbank feature. + (https://github.com/kan-bayashi/ParallelWaveGAN/blob/master/parallel_wavegan/bin/preprocess.py) + + Args: + audio (ndarray): Audio signal (T,). + sampling_rate (int): Sampling rate. + fft_size (int): FFT size. + hop_size (int): Hop size. + win_length (int): Window length. If set to None, it will be the same as fft_size. + window (str): Window function type. + num_mels (int): Number of mel basis. + fmin (int): Minimum frequency in mel basis calculation. + fmax (int): Maximum frequency in mel basis calculation. + eps (float): Epsilon value to avoid inf in log calculation. + + Returns: + ndarray: Log Mel filterbank feature (#frames, num_mels). + + """ + # get amplitude spectrogram + x_stft = librosa.stft(audio, n_fft=fft_size, hop_length=hop_size, + win_length=win_length, window=window, pad_mode="reflect") + spc = np.abs(x_stft).T # (#frames, #bins) + + # get mel basis + fmin = 0 if fmin is None else fmin + fmax = sampling_rate / 2 if fmax is None else fmax + mel_basis = librosa.filters.mel(sr=sampling_rate, n_fft=fft_size, n_mels=num_mels, fmin=fmin, fmax=fmax) + + return np.log10(np.maximum(eps, np.dot(spc, mel_basis.T))) + + +class SpeechToSpeechDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + shuffle: bool = True, + normalize: bool = False, + reduction_factor: int = 1, + ): + self.audio_root, self.audio_names, inds, tot, self.wav_sizes, self.tgt_audios, self.tgt_sizes, self.tgt_spkembs = load_audio( + manifest_path, max_keep_sample_size, min_keep_sample_size + ) + self.sample_rate = sample_rate + self.shuffle = shuffle + + self.normalize = normalize + self.reduction_factor = reduction_factor + logger.info( + f"reduction_factor={reduction_factor}, normalize={normalize}" + ) + + def get_audio(self, index): + import soundfile as sf + + wav_fbank = [] + for name in [self.audio_names[index], self.tgt_audios[index]]: + wav_path = os.path.join(self.audio_root, name) + wav, cur_sample_rate = sf.read(wav_path) + wav = torch.from_numpy(wav).float() + fbank = logmelfilterbank( + wav.view(-1).cpu().numpy(), 16000 + ) + fbank = torch.from_numpy(fbank).float() + wav = self.postprocess(wav, cur_sample_rate) + wav_fbank.append(wav) + wav_fbank.append(fbank) + src_wav, src_fbank, tgt_wav, tgt_fbank = wav_fbank + return src_wav, src_fbank, tgt_wav, tgt_fbank + + def __getitem__(self, index): + src_wav, src_fbank, tgt_wav, tgt_fbank = self.get_audio(index) + spkembs = np.load(os.path.join(self.audio_root, self.tgt_spkembs[index])) + spkembs = torch.from_numpy(spkembs).float() + name = self.audio_names[index].replace("/", ".").replace(".wav", "") + "-" + self.tgt_audios[index].replace("/", ".").replace(".wav", "") + ".wav" + return {"id": index, "source": src_wav, "target": tgt_fbank, "spkembs": spkembs, "audio_name": name, "tgt_name": self.tgt_audios[index]} + + def __len__(self): + return len(self.wav_sizes) + + def collater(self, samples): + samples = [s for s in samples if s["source"] is not None] + if len(samples) == 0: + return {} + + audios = [s["source"] for s in samples] + audio_sizes = [len(s) for s in audios] + + audio_size = max(audio_sizes) + collated_audios, padding_mask = self.collater_audio( + audios, audio_size + ) + + fbanks = [s["target"] for s in samples] + fbank_sizes = [len(s) for s in fbanks] + + collated_fbanks = _collate_frames(fbanks) + collated_fbanks_size = torch.tensor(fbank_sizes, dtype=torch.long) + + # thin out frames for reduction factor (B, Lmax, odim) -> (B, Lmax//r, odim) + if self.reduction_factor > 1: + collated_fbanks_in = collated_fbanks[:, self.reduction_factor - 1 :: self.reduction_factor] + collated_fbanks_size_in = collated_fbanks_size.new([torch.div(olen, self.reduction_factor, rounding_mode='floor') for olen in collated_fbanks_size]) + else: + collated_fbanks_in, collated_fbanks_size_in = collated_fbanks, collated_fbanks_size + + prev_output_tokens = torch.cat( + [collated_fbanks_in.new_zeros((collated_fbanks_in.shape[0], 1, collated_fbanks_in.shape[2])), collated_fbanks_in[:, :-1]], dim=1 + ) + + # make labels for stop prediction + labels = collated_fbanks.new_zeros(collated_fbanks.size(0), collated_fbanks.size(1)) + for i, l in enumerate(fbank_sizes): + labels[i, l - 1 :] = 1.0 + + spkembs = _collate_frames([s["spkembs"] for s in samples], is_audio_input=True) + + net_input = { + "source": collated_audios, + "padding_mask": padding_mask, + "prev_output_tokens": prev_output_tokens, + "tgt_lengths": collated_fbanks_size_in, + "spkembs": spkembs, + "task_name": "s2s", + } + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "name": [s["audio_name"] for s in samples], + "tgt_name": [s["tgt_name"] for s in samples], + "net_input": net_input, + "labels": labels, + "dec_target": collated_fbanks, + "dec_target_lengths": collated_fbanks_size, + "src_lengths": torch.LongTensor(audio_sizes), + "task_name": "s2s", + "ntokens": sum(audio_sizes), + "target": collated_fbanks, + } + + return batch + + def collater_audio(self, audios, audio_size): + collated_audios = audios[0].new_zeros(len(audios), audio_size) + padding_mask = ( + torch.BoolTensor(collated_audios.shape).fill_(False) + ) + for i, audio in enumerate(audios): + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + collated_audios[i] = torch.cat([audio, audio.new_full((-diff,), 0.0)]) + padding_mask[i, diff:] = True + else: + raise Exception("Diff should not be larger than 0") + return collated_audios, padding_mask + + + def num_tokens(self, index): + return self.wav_sizes[index] + + def size(self, index): + return self.wav_sizes[index], self.tgt_sizes[index] + + @property + def sizes(self): + return np.array(self.wav_sizes) + + @property + def can_reuse_epoch_itr_across_epochs(self): + """No cache dataset if dataset is large-scale. Cache dataset for small dataset.""" + return True + + def ordered_indices(self): + if self.shuffle: + order = [np.random.permutation(len(self))] + else: + order = [np.arange(len(self))] + + order.append(self.wav_sizes) + return np.lexsort(order)[::-1] + + def postprocess(self, wav, cur_sample_rate): + if wav.dim() == 2: + wav = wav.mean(-1) + assert wav.dim() == 1, wav.dim() + + if cur_sample_rate != self.sample_rate: + raise Exception(f"sr {cur_sample_rate} != {self.sample_rate}") + + if self.normalize: + with torch.no_grad(): + wav = F.layer_norm(wav, wav.shape) + return wav diff --git a/SpeechT5/SpeechT5/speecht5/data/speech_to_text_dataset.py b/SpeechT5/SpeechT5/speecht5/data/speech_to_text_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..e0be66663d1e8d268e98f7e56abc4c8af2cc4232 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/data/speech_to_text_dataset.py @@ -0,0 +1,270 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import itertools +import logging +import os +from typing import Any, List, Optional + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils, Dictionary +from fairseq.data.fairseq_dataset import FairseqDataset + +logger = logging.getLogger(__name__) + + +def load_audio(manifest_path, max_keep, min_keep): + n_long, n_short = 0, 0 + names, inds, sizes = [], [], [] + with open(manifest_path) as f: + root = f.readline().strip() + for ind, line in enumerate(f): + items = line.strip().split("\t") + assert len(items) >= 2, line + sz = int(items[1]) + if min_keep is not None and sz < min_keep: + n_short += 1 + elif max_keep is not None and sz > max_keep: + n_long += 1 + else: + names.append(items[0]) + inds.append(ind) + sizes.append(sz) + tot = ind + 1 + logger.info( + ( + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, names, inds, tot, sizes + + +def load_label(label_path, inds, tot): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds, tot): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + assert ( + len(code_lengths) == tot + ), f"number of labels does not match ({len(code_lengths)} != {tot})" + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + +class SpeechToTextDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + shuffle: bool = True, + normalize: bool = False, + store_labels: bool = True, + tgt_dict: Optional[Dictionary] = None, + tokenizer = None, + ): + self.audio_root, self.audio_names, inds, tot, self.wav_sizes = load_audio( + manifest_path, max_keep_sample_size, min_keep_sample_size + ) + self.sample_rate = sample_rate + self.shuffle = shuffle + self.tgt_dict = tgt_dict + self.tokenizer = tokenizer + + self.num_labels = len(label_paths) + self.label_processors = label_processors + self.store_labels = store_labels + if store_labels: + self.label_list = [load_label(p, inds, tot) for p in label_paths] + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds, tot) for p in label_paths + ] + assert label_processors is None or len(label_processors) == self.num_labels + + self.normalize = normalize + logger.info( + f"normalize={normalize}" + ) + + def get_audio(self, index): + import soundfile as sf + + wav_path = os.path.join(self.audio_root, self.audio_names[index]) + wav, cur_sample_rate = sf.read(wav_path) + wav = torch.from_numpy(wav).float() + wav = self.postprocess(wav, cur_sample_rate) + return wav + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.tokenizer is not None: + label = self.tokenizer.encode(label) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + def __getitem__(self, index): + wav = self.get_audio(index) + labels = self.get_labels(index) + return {"id": index, "source": wav, "label_list": labels} + + def __len__(self): + return len(self.wav_sizes) + + def collater(self, samples): + samples = [s for s in samples if s["source"] is not None] + if len(samples) == 0: + return {} + + audios = [s["source"] for s in samples] + audio_sizes = [len(s) for s in audios] + + audio_size = max(audio_sizes) + collated_audios, padding_mask = self.collater_audio( + audios, audio_size + ) + + targets_by_label = [ + [s["label_list"][i] for s in samples] for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label(targets_by_label) + + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]], torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + + decoder_target = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos(), + left_pad=False, + move_eos_to_beginning=False, + ) + decoder_target_lengths = torch.tensor( + [x.size(0) for x in decoder_label], dtype=torch.long + ) + prev_output_tokens = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos(), + left_pad=False, + move_eos_to_beginning=True, + ) + + net_input = { + "source": collated_audios, + "padding_mask": padding_mask, + "prev_output_tokens": prev_output_tokens, + "task_name": "s2t", + } + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + "target": decoder_target, + "target_lengths": decoder_target_lengths, + "task_name": "s2t", + "ntokens": ntokens_list[0] + } + + return batch + + def collater_audio(self, audios, audio_size): + collated_audios = audios[0].new_zeros(len(audios), audio_size) + padding_mask = ( + torch.BoolTensor(collated_audios.shape).fill_(False) + ) + for i, audio in enumerate(audios): + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + collated_audios[i] = torch.cat([audio, audio.new_full((-diff,), 0.0)]) + padding_mask[i, diff:] = True + else: + raise Exception("Diff should not be larger than 0") + return collated_audios, padding_mask + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_label(self, targets_by_label): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, [self.tgt_dict.pad()]) + for targets, pad in itr: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + return self.wav_sizes[index] + + @property + def sizes(self): + return np.array(self.wav_sizes) + + def ordered_indices(self): + if self.shuffle: + order = [np.random.permutation(len(self))] + else: + order = [np.arange(len(self))] + + order.append(self.wav_sizes) + return np.lexsort(order)[::-1] + + def postprocess(self, wav, cur_sample_rate): + if wav.dim() == 2: + wav = wav.mean(-1) + assert wav.dim() == 1, wav.dim() + + if cur_sample_rate != self.sample_rate: + raise Exception(f"sr {cur_sample_rate} != {self.sample_rate}") + + if self.normalize: + with torch.no_grad(): + wav = F.layer_norm(wav, wav.shape) + return wav diff --git a/SpeechT5/SpeechT5/speecht5/data/text_dataset.py b/SpeechT5/SpeechT5/speecht5/data/text_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..faa0120d25807c3d7420ecd5848ca7429fc79dd5 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/data/text_dataset.py @@ -0,0 +1,476 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import math + +import numpy as np +import torch + +from fairseq.data import FairseqDataset, data_utils + + +def collate( + samples, + pad_idx, + eos_idx, + vocab, + left_pad_source=False, + left_pad_target=False, + input_feeding=True, + pad_to_length=None, +): + assert input_feeding + if len(samples) == 0: + return {} + + def merge(key, left_pad, move_eos_to_beginning=False, pad_to_length=None): + return data_utils.collate_tokens( + [s[key] for s in samples], + pad_idx, + eos_idx=None, # use eos_idx of each sample instead of vocab.eos() + left_pad=left_pad, + move_eos_to_beginning=move_eos_to_beginning, + pad_to_length=pad_to_length, + ) + + id = torch.LongTensor([s["id"] for s in samples]) + src_tokens = merge( + "source", + left_pad=left_pad_source, + pad_to_length=pad_to_length["source"] if pad_to_length is not None else None, + ) + # sort by descending source length + src_lengths = torch.LongTensor([s["source"].numel() for s in samples]) + src_lengths, sort_order = src_lengths.sort(descending=True) + id = id.index_select(0, sort_order) + src_tokens = src_tokens.index_select(0, sort_order) + + prev_output_tokens = None + target = None + if samples[0].get("target", None) is not None: + target = merge( + "target", + left_pad=left_pad_target, + pad_to_length=pad_to_length["target"] + if pad_to_length is not None + else None, + ) + target = target.index_select(0, sort_order) + ntokens = sum(len(s["target"]) for s in samples) + + if input_feeding: + # we create a shifted version of targets for feeding the + # previous output token(s) into the next decoder step + prev_output_tokens = merge( + "target", + left_pad=left_pad_target, + move_eos_to_beginning=True, + pad_to_length=pad_to_length["target"] + if pad_to_length is not None + else None, + ) + prev_output_tokens = prev_output_tokens.index_select(0, sort_order) + else: + ntokens = sum(len(s["source"]) for s in samples) + + batch = { + "id": id, + "ntokens": ntokens, + "net_input": { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + }, + "target": target, + "nsentences": samples[0]["source"].size(0), + "sort_order": sort_order, + "task_name": 'text_pretrain', + } + if prev_output_tokens is not None: + batch["net_input"]["prev_output_tokens"] = prev_output_tokens + + return batch + + +class TextPretrainDataset(FairseqDataset): + """ + A wrapper around TokenBlockDataset for BART dataset. + + Args: + dataset (TokenBlockDataset): dataset to wrap + sizes (List[int]): sentence lengths + vocab (~fairseq.data.Dictionary): vocabulary + mask_idx (int): dictionary index used for masked token + mask_whole_words: only mask whole words. This should be a byte mask + over vocab indices, indicating whether it is the beginning of a + word. We will extend any mask to encompass the whole word. + shuffle (bool, optional): shuffle the elements before batching. + Default: ``True`` + seed: Seed for random number generator for reproducibility. + args: argparse arguments. + """ + + def __init__( + self, + dataset, + sizes, + vocab, + mask_idx, + mask_whole_words, + shuffle, + seed, + args, + eos=None, + item_transform_func=None, + iid_noise_target=False, + uni_mask_idxs=None, + ): + self.dataset = dataset + + self.sizes = sizes + + self.vocab = vocab + self.shuffle = shuffle + self.seed = seed + if iid_noise_target: + assert isinstance(uni_mask_idxs, torch.Tensor), "if use iid_noise_target, the uni_mask_idxs must be a tensor which contain the mask indexs" + self.iid_noise_target = iid_noise_target + self.uni_mask_idxs = uni_mask_idxs + self.mask_idx = mask_idx + self.mask_whole_word = mask_whole_words + self.mask_ratio = args.mask + self.random_ratio = args.mask_random + self.insert_ratio = args.insert + self.rotate_ratio = args.rotate + self.permute_sentence_ratio = args.permute_sentences + self.eos = eos if eos is not None else vocab.eos() + self.item_transform_func = item_transform_func + + if args.bpe != "gpt2": + self.full_stop_index = self.vocab.eos() + else: + assert args.bpe == "gpt2" + self.full_stop_index = self.vocab.index("13") + + self.replace_length = args.replace_length + if self.replace_length not in [-1, 0, 1]: + raise ValueError(f"invalid arg: replace_length={self.replace_length}") + if args.mask_length not in ["subword", "word", "span-poisson"]: + raise ValueError(f"invalid arg: mask-length={args.mask_length}") + if args.mask_length == "subword" and args.replace_length not in [0, 1]: + raise ValueError(f"if using subwords, use replace-length=1 or 0") + + self.mask_span_distribution = None + if args.mask_length == "span-poisson": + _lambda = args.poisson_lambda + + lambda_to_the_k = 1 + e_to_the_minus_lambda = math.exp(-_lambda) + k_factorial = 1 + ps = [] + for k in range(0, 128): + ps.append(e_to_the_minus_lambda * lambda_to_the_k / k_factorial) + lambda_to_the_k *= _lambda + k_factorial *= k + 1 + if ps[-1] < 0.0000001: + break + ps = torch.FloatTensor(ps) + self.mask_span_distribution = torch.distributions.Categorical(ps) + + self.epoch = 0 + + @property + def can_reuse_epoch_itr_across_epochs(self): + return True # only the noise changes, not item sizes + + def set_epoch(self, epoch, **unused): + self.epoch = epoch + + def __getitem__(self, index): + with data_utils.numpy_seed(self.seed, self.epoch, index): + tokens = self.dataset[index] + assert tokens[-1] == self.eos + source, target = tokens, tokens.clone() + + if self.permute_sentence_ratio > 0.0: + source = self.permute_sentences(source, self.permute_sentence_ratio) + + if self.mask_ratio > 0: + source, new_target = self.add_whole_word_mask(source, self.mask_ratio) + if new_target is not None: + target = new_target + + if self.insert_ratio > 0: + source = self.add_insertion_noise(source, self.insert_ratio) + + if self.rotate_ratio > 0.0 and np.random.random() < self.rotate_ratio: + source = self.add_rolling_noise(source) + # there can additional changes to make: + if self.item_transform_func is not None: + source, target = self.item_transform_func(source, target) + + assert (source >= 0).all() + assert (source[1:-1] >= 1).all() + assert (source <= len(self.vocab)).all() + assert source[0] == self.vocab.bos() + assert source[-1] == self.eos + return { + "id": index, + "source": source, + "target": target, + } + + def __len__(self): + return len(self.dataset) + + def permute_sentences(self, source, p=1.0): + full_stops = source == self.full_stop_index + # Pretend it ends with a full stop so last span is a sentence + full_stops[-2] = 1 + + # Tokens that are full stops, where the previous token is not + sentence_ends = (full_stops[1:] * ~full_stops[:-1]).nonzero(as_tuple=False) + 2 + result = source.clone() + + num_sentences = sentence_ends.size(0) + num_to_permute = math.ceil((num_sentences * 2 * p) / 2.0) + substitutions = torch.randperm(num_sentences)[:num_to_permute] + ordering = torch.arange(0, num_sentences) + ordering[substitutions] = substitutions[torch.randperm(num_to_permute)] + + # Ignore at start + index = 1 + for i in ordering: + sentence = source[(sentence_ends[i - 1] if i > 0 else 1) : sentence_ends[i]] + result[index : index + sentence.size(0)] = sentence + index += sentence.size(0) + return result + + def word_starts(self, source): + if self.mask_whole_word is not None: + is_word_start = self.mask_whole_word.gather(0, source) + else: + is_word_start = torch.ones(source.size()) + is_word_start[0] = 0 + is_word_start[-1] = 0 + return is_word_start + + def add_whole_word_mask(self, source, p): + source_ori = source.clone() + is_word_start = self.word_starts(source) + num_to_mask = int(math.ceil(is_word_start.float().sum() * p)) + num_inserts = 0 + if num_to_mask == 0: + return source + + if self.mask_span_distribution is not None: + lengths = self.mask_span_distribution.sample(sample_shape=(num_to_mask,)) + + # Make sure we have enough to mask + cum_length = torch.cumsum(lengths, 0) + while cum_length[-1] < num_to_mask: + lengths = torch.cat( + [ + lengths, + self.mask_span_distribution.sample(sample_shape=(num_to_mask,)), + ], + dim=0, + ) + cum_length = torch.cumsum(lengths, 0) + + # Trim to masking budget + i = 0 + while cum_length[i] < num_to_mask: + i += 1 + lengths[i] = num_to_mask - (0 if i == 0 else cum_length[i - 1]) + num_to_mask = i + 1 + lengths = lengths[:num_to_mask] + + # Handle 0-length mask (inserts) separately + lengths = lengths[lengths > 0] + num_inserts = num_to_mask - lengths.size(0) + num_to_mask -= num_inserts + if num_to_mask == 0: + return self.add_insertion_noise(source, num_inserts / source.size(0)) + + assert (lengths > 0).all() + else: + lengths = torch.ones((num_to_mask,)).long() + assert is_word_start[-1] == 0 + word_starts = is_word_start.nonzero(as_tuple=False) + indices = word_starts[ + torch.randperm(word_starts.size(0))[:num_to_mask] + ].squeeze(1) + mask_random = torch.FloatTensor(num_to_mask).uniform_() < self.random_ratio + + source_length = source.size(0) + assert source_length - 1 not in indices + to_keep = torch.ones(source_length, dtype=torch.bool) + is_word_start[ + -1 + ] = 255 # acts as a long length, so spans don't go over the end of doc + if self.replace_length == 0: + to_keep[indices] = 0 + else: + # keep index, but replace it with [MASK] + source[indices] = self.mask_idx + source[indices[mask_random]] = torch.randint( + 1, len(self.vocab), size=(mask_random.sum(),) + ) + + if self.mask_span_distribution is not None: + assert len(lengths.size()) == 1 + assert lengths.size() == indices.size() + lengths -= 1 + while indices.size(0) > 0: + assert lengths.size() == indices.size() + lengths -= is_word_start[indices + 1].long() + uncompleted = lengths >= 0 + indices = indices[uncompleted] + 1 + mask_random = mask_random[uncompleted] + lengths = lengths[uncompleted] + if self.replace_length != -1: + # delete token + to_keep[indices] = 0 + else: + # keep index, but replace it with [MASK] + source[indices] = self.mask_idx + source[indices[mask_random]] = torch.randint( + 1, len(self.vocab), size=(mask_random.sum(),) + ) + else: + # A bit faster when all lengths are 1 + while indices.size(0) > 0: + uncompleted = is_word_start[indices + 1] == 0 + indices = indices[uncompleted] + 1 + mask_random = mask_random[uncompleted] + if self.replace_length != -1: + # delete token + to_keep[indices] = 0 + else: + # keep index, but replace it with [MASK] + source[indices] = self.mask_idx + source[indices[mask_random]] = torch.randint( + 1, len(self.vocab), size=(mask_random.sum(),) + ) + + assert source_length - 1 not in indices + + if not self.iid_noise_target: + source = source[to_keep] + target = None + else: + ## Prepare source + source_mask_idx = (source == self.mask_idx).nonzero().view(-1) + source[source_mask_idx] = self.uni_mask_idxs[:source_mask_idx.size(0)] + source = source[to_keep] + + ## Prepare target + to_keep[source_mask_idx] = 0 + + # source_mask_idx: from [a, b, c, ...] to [a, b + 1, c + 2, ...] + source_mask_idx = source_mask_idx + torch.arange(source_mask_idx.size(0)) + # target: source_length + mask_length + target = source_ori.new_zeros(source_mask_idx.size(0) + source_ori.size(0)) + # target: [0, 0, 0, X, 0, 0, Y, ....] + target[source_mask_idx] = self.uni_mask_idxs[:source_mask_idx.size(0)] + + target_to_keep = to_keep.new_zeros(source_mask_idx.size(0) + source_ori.size(0)) + + # Copy original value to target and target_to_keep + target_to_keep[target == 0] = to_keep + target_to_keep[-1] = 0 + target[target == 0] = source_ori + + target = target[~target_to_keep] + + if num_inserts > 0: + source = self.add_insertion_noise(source, num_inserts / source.size(0)) + + return source, target + + def add_permuted_noise(self, tokens, p): + num_words = len(tokens) + num_to_permute = math.ceil(((num_words * 2) * p) / 2.0) + substitutions = torch.randperm(num_words - 2)[:num_to_permute] + 1 + tokens[substitutions] = tokens[substitutions[torch.randperm(num_to_permute)]] + return tokens + + def add_rolling_noise(self, tokens): + offset = np.random.randint(1, max(1, tokens.size(-1) - 1) + 1) + tokens = torch.cat( + (tokens[0:1], tokens[offset:-1], tokens[1:offset], tokens[-1:]), + dim=0, + ) + return tokens + + def add_insertion_noise(self, tokens, p): + if p == 0.0: + return tokens + + num_tokens = len(tokens) + n = int(math.ceil(num_tokens * p)) + + noise_indices = torch.randperm(num_tokens + n - 2)[:n] + 1 + noise_mask = torch.zeros(size=(num_tokens + n,), dtype=torch.bool) + noise_mask[noise_indices] = 1 + result = torch.LongTensor(n + len(tokens)).fill_(-1) + + num_random = int(math.ceil(n * self.random_ratio)) + result[noise_indices[num_random:]] = self.mask_idx + result[noise_indices[:num_random]] = torch.randint( + low=1, high=len(self.vocab), size=(num_random,) + ) + + result[~noise_mask] = tokens + + assert (result >= 0).all() + return result + + def collater(self, samples, pad_to_length=None): + """Merge a list of samples to form a mini-batch. + Args: + samples (List[dict]): samples to collate + Returns: + dict: a mini-batch of data + """ + return collate( + samples, self.vocab.pad(), self.eos, self.vocab, pad_to_length=pad_to_length + ) + + def num_tokens(self, index): + """Return the number of tokens in a sample. This value is used to + enforce ``--max-tokens`` during batching.""" + return self.sizes[index] + + def size(self, index): + """Return an example's size as a float or tuple. This value is used when + filtering a dataset with ``--max-positions``.""" + return self.sizes[index] + + def ordered_indices(self): + """Return an ordered list of indices. Batches will be constructed based + on this order.""" + if self.shuffle: + indices = np.random.permutation(len(self)) + else: + indices = np.arange(len(self)) + return indices[np.argsort(self.sizes[indices], kind="mergesort")] + + def prefetch(self, indices): + self.src.prefetch(indices) + self.tgt.prefetch(indices) + + @property + def supports_prefetch(self): + return ( + hasattr(self.src, "supports_prefetch") + and self.src.supports_prefetch + and hasattr(self.tgt, "supports_prefetch") + and self.tgt.supports_prefetch + ) diff --git a/SpeechT5/SpeechT5/speecht5/data/text_to_speech_dataset.py b/SpeechT5/SpeechT5/speecht5/data/text_to_speech_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..e0e0d750d142fed26f555a88062e25fabf0f0153 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/data/text_to_speech_dataset.py @@ -0,0 +1,331 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import itertools +import logging +import os +from typing import Any, List, Optional + +import numpy as np + +import torch +import torch.nn.functional as F +import librosa +from fairseq.data.audio.speech_to_text_dataset import get_features_or_waveform +from fairseq.data import data_utils, Dictionary +from fairseq.data.fairseq_dataset import FairseqDataset + + +logger = logging.getLogger(__name__) + +def _collate_frames( + frames: List[torch.Tensor], is_audio_input: bool = False +): + """ + Convert a list of 2D frames into a padded 3D tensor + Args: + frames (list): list of 2D frames of size L[i]*f_dim. Where L[i] is + length of i-th frame and f_dim is static dimension of features + Returns: + 3D tensor of size len(frames)*len_max*f_dim where len_max is max of L[i] + """ + max_len = max(frame.size(0) for frame in frames) + if is_audio_input: + out = frames[0].new_zeros((len(frames), max_len)) + else: + out = frames[0].new_zeros((len(frames), max_len, frames[0].size(1))) + for i, v in enumerate(frames): + out[i, : v.size(0)] = v + return out + +def load_audio(manifest_path, max_keep, min_keep): + n_long, n_short = 0, 0 + names, inds, sizes, spk_embeds = [], [], [], [] + with open(manifest_path) as f: + root = f.readline().strip() + for ind, line in enumerate(f): + items = line.strip().split("\t") + assert len(items) == 3, line + sz = int(items[1]) + if min_keep is not None and sz < min_keep: + n_short += 1 + elif max_keep is not None and sz > max_keep: + n_long += 1 + else: + names.append(items[0]) + spk_embeds.append(items[2]) + inds.append(ind) + sizes.append(sz) + tot = ind + 1 + logger.info( + ( + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, names, inds, tot, sizes, spk_embeds + + +def load_label(label_path, inds, tot): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds, tot): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + assert ( + len(code_lengths) == tot + ), f"number of labels does not match ({len(code_lengths)} != {tot})" + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + +def logmelfilterbank( + audio, + sampling_rate, + fft_size=1024, + hop_size=256, + win_length=None, + window="hann", + num_mels=80, + fmin=80, + fmax=7600, + eps=1e-10, +): + """Compute log-Mel filterbank feature. + (https://github.com/kan-bayashi/ParallelWaveGAN/blob/master/parallel_wavegan/bin/preprocess.py) + + Args: + audio (ndarray): Audio signal (T,). + sampling_rate (int): Sampling rate. + fft_size (int): FFT size. + hop_size (int): Hop size. + win_length (int): Window length. If set to None, it will be the same as fft_size. + window (str): Window function type. + num_mels (int): Number of mel basis. + fmin (int): Minimum frequency in mel basis calculation. + fmax (int): Maximum frequency in mel basis calculation. + eps (float): Epsilon value to avoid inf in log calculation. + + Returns: + ndarray: Log Mel filterbank feature (#frames, num_mels). + + """ + # get amplitude spectrogram + x_stft = librosa.stft(audio, n_fft=fft_size, hop_length=hop_size, + win_length=win_length, window=window, pad_mode="reflect") + spc = np.abs(x_stft).T # (#frames, #bins) + + # get mel basis + fmin = 0 if fmin is None else fmin + fmax = sampling_rate / 2 if fmax is None else fmax + mel_basis = librosa.filters.mel(sr=sampling_rate, n_fft=fft_size, n_mels=num_mels, fmin=fmin, fmax=fmax) + + return np.log10(np.maximum(eps, np.dot(spc, mel_basis.T))) + + + +class TextToSpeechDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + shuffle: bool = True, + normalize: bool = False, + store_labels: bool = True, + src_dict: Optional[Dictionary] = None, + tokenizer = None, + reduction_factor: int = 1, + ): + self.audio_root, self.audio_names, inds, tot, self.wav_sizes, self.spk_embeds = load_audio( + manifest_path, max_keep_sample_size, min_keep_sample_size + ) + self.sample_rate = sample_rate + self.shuffle = shuffle + self.src_dict = src_dict + self.tokenizer = tokenizer + + self.num_labels = len(label_paths) + self.label_processors = label_processors + self.store_labels = store_labels + if store_labels: + self.label_list = [load_label(p, inds, tot) for p in label_paths] + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds, tot) for p in label_paths + ] + assert label_processors is None or len(label_processors) == self.num_labels + + self.normalize = normalize + self.reduction_factor = reduction_factor + logger.info( + f"reduction_factor={reduction_factor}, normalize={normalize}" + ) + + def get_audio(self, index): + import soundfile as sf + + wav_path = os.path.join(self.audio_root, self.audio_names[index]) + wav, cur_sample_rate = sf.read(wav_path) + wav = torch.from_numpy(wav).float() + fbank = logmelfilterbank( + wav.view(-1).cpu().numpy(), 16000 + ) + fbank = torch.from_numpy(fbank).float() + wav = self.postprocess(wav, cur_sample_rate) + return wav, fbank + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.tokenizer is not None: + label = self.tokenizer.encode(label) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + def __getitem__(self, index): + wav, fbank = self.get_audio(index) + labels = self.get_labels(index) + spkembs = get_features_or_waveform( + os.path.join(self.audio_root, self.spk_embeds[index]) + ) + spkembs = torch.from_numpy(spkembs).float() + return {"id": index, "source": labels, "target": fbank, "spkembs": spkembs, "audio_name": self.audio_names[index]} + + def __len__(self): + return len(self.wav_sizes) + + def collater(self, samples): + samples = [s for s in samples if s["source"] is not None] + if len(samples) == 0: + return {} + + fbanks = [s["target"] for s in samples] + fbank_sizes = [len(s) for s in fbanks] + + collated_fbanks = _collate_frames(fbanks) + collated_fbanks_size = torch.tensor(fbank_sizes, dtype=torch.long) + + # thin out frames for reduction factor (B, Lmax, odim) -> (B, Lmax//r, odim) + if self.reduction_factor > 1: + collated_fbanks_in = collated_fbanks[:, self.reduction_factor - 1 :: self.reduction_factor] + collated_fbanks_size_in = collated_fbanks_size.new([torch.div(olen, self.reduction_factor, rounding_mode='floor') for olen in collated_fbanks_size]) + else: + collated_fbanks_in, collated_fbanks_size_in = collated_fbanks, collated_fbanks_size + + prev_output_tokens = torch.cat( + [collated_fbanks_in.new_zeros((collated_fbanks_in.shape[0], 1, collated_fbanks_in.shape[2])), collated_fbanks_in[:, :-1]], dim=1 + ) + + # make labels for stop prediction + labels = collated_fbanks.new_zeros(collated_fbanks.size(0), collated_fbanks.size(1)) + for i, l in enumerate(fbank_sizes): + labels[i, l - 1 :] = 1.0 + + spkembs = _collate_frames([s["spkembs"] for s in samples], is_audio_input=True) + + sources_by_label = [ + [s["source"][i] for s in samples] for i in range(self.num_labels) + ] + sources_list, lengths_list, ntokens_list = self.collater_label(sources_by_label) + + net_input = { + "src_tokens": sources_list[0], + "src_lengths": lengths_list[0], + "prev_output_tokens": prev_output_tokens, + "tgt_lengths": collated_fbanks_size_in, + "spkembs": spkembs, + "task_name": "t2s", + } + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "name": [s["audio_name"] for s in samples], + "net_input": net_input, + "labels": labels, + "dec_target": collated_fbanks, + "dec_target_lengths": collated_fbanks_size, + "src_lengths": lengths_list[0], + "task_name": "t2s", + "ntokens": ntokens_list[0], + "target": collated_fbanks, + } + + return batch + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_label(self, targets_by_label): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, [self.src_dict.pad()]) + for targets, pad in itr: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + return self.wav_sizes[index] + + @property + def sizes(self): + return np.array(self.wav_sizes) + + def ordered_indices(self): + if self.shuffle: + order = [np.random.permutation(len(self))] + else: + order = [np.arange(len(self))] + + order.append(self.wav_sizes) + return np.lexsort(order)[::-1] + + def postprocess(self, wav, cur_sample_rate): + if wav.dim() == 2: + wav = wav.mean(-1) + assert wav.dim() == 1, wav.dim() + + if cur_sample_rate != self.sample_rate: + raise Exception(f"sr {cur_sample_rate} != {self.sample_rate}") + + if self.normalize: + with torch.no_grad(): + wav = F.layer_norm(wav, wav.shape) + return wav diff --git a/SpeechT5/SpeechT5/speecht5/models/__init__.py b/SpeechT5/SpeechT5/speecht5/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d8db7a74c15397db0aaf82a1459146cbe12a9c8b --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/__init__.py @@ -0,0 +1,2 @@ +from .speecht5 import * # noqa +from .t5_transformer_lm import * # noqa diff --git a/SpeechT5/SpeechT5/speecht5/models/__pycache__/__init__.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3790a5f4602527577e96b4f6409c5ec093a5dec Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/__pycache__/__init__.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/__pycache__/speecht5.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/__pycache__/speecht5.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e2896cf9451ca2aecf373f7e98f0d3e3de85df03 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/__pycache__/speecht5.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/__pycache__/t5_transformer_lm.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/__pycache__/t5_transformer_lm.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e151cc0b994042bda49225c8ccc5ed9d9c0bdb3b Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/__pycache__/t5_transformer_lm.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__init__.py b/SpeechT5/SpeechT5/speecht5/models/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/__init__.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe0e384d115e0195f488b5cb75c858ea81dddcde Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/__init__.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/decoder.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/decoder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2d80c9d76e64f75470c8af41a8899378ad99a7fd Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/decoder.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/encoder.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/encoder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b35d6fefcde56df42af0e0be1286bc03102645af Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/encoder.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/multihead_attention.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/multihead_attention.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d6ceabf6fed2a4e2906db0839e50af1cbd5bccb Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/multihead_attention.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speaker_decoder_postnet.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speaker_decoder_postnet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c60bd4dc28a5d2360f3ed3eb1a12706a9bd2f5df Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speaker_decoder_postnet.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_decoder_postnet.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_decoder_postnet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fca1593e32969d9380adb2ac06c727d8fd86ec5 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_decoder_postnet.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_decoder_prenet.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_decoder_prenet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c0cd7f46ab1171beea8c0b19576d425f27742c21 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_decoder_prenet.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_encoder_postnet.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_encoder_postnet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..85152411f95643196586788f42f6bee76a7801dd Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_encoder_postnet.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_encoder_prenet.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_encoder_prenet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4af6bde0ce17c9bb8d6a672fc133bbb94538564c Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/speech_encoder_prenet.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/text_decoder_postnet.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/text_decoder_postnet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4bd9424b168389b4b0b9bfda76e512286ccc5b66 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/text_decoder_postnet.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/text_decoder_prenet.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/text_decoder_prenet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3031c3dd8943a7f9ae830e027d7781c7b2701f9f Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/text_decoder_prenet.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/text_encoder_prenet.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/text_encoder_prenet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ad3c6ba53cfce3290abfd5f247c71cbb957dfdd7 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/text_encoder_prenet.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/transformer_layer.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/transformer_layer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..204c69fd9750efbf4ddf09e6098a70bbeec5ba9b Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/models/modules/__pycache__/transformer_layer.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/decoder.py b/SpeechT5/SpeechT5/speecht5/models/modules/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..a066d1dd38af547c62e86b0aaa4efcf7f4e47040 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/decoder.py @@ -0,0 +1,324 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +from typing import Any, Dict, List, Optional + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import ( + FairseqIncrementalDecoder, +) +from fairseq.modules import ( + FairseqDropout, + LayerDropModuleList, + LayerNorm, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from torch import Tensor + +from .encoder import RelativePositionalEncoding +from .transformer_layer import TransformerDecoderLayer + +DEFAULT_MIN_PARAMS_TO_WRAP = int(1e8) + + +class TransformerDecoder(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + args, + no_encoder_attn=False, + ): + self.args = args + super().__init__(None) + self.register_buffer("version", torch.Tensor([3])) + self._future_mask = torch.empty(0) + + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.decoder_layerdrop = args.decoder_layerdrop + # self.max_s_positions = args.max_target_positions + export = getattr(args, "export", False) + self.cross_self_attention = getattr(args, "cross_self_attention", False) + + if self.decoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.decoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + self.build_decoder_layer(args, no_encoder_attn) + for _ in range(args.decoder_layers) + ] + ) + self.num_layers = len(self.layers) + + if args.decoder_normalize_before and not getattr( + args, "no_decoder_final_norm", False + ): + self.layer_norm = LayerNorm(args.decoder_embed_dim, eps=args.layer_norm_eps, export=export) + else: + self.layer_norm = None + + if args.relative_position_embedding: + self.pos_emb = RelativePositionalEncoding(args.encoder_embed_dim//args.encoder_attention_heads, args.decoder_max_relative_position) + + def build_decoder_layer(self, args, no_encoder_attn=False): + layer = TransformerDecoderLayer(args, no_encoder_attn=no_encoder_attn, has_relative_attention_bias=args.relative_position_embedding) + checkpoint = getattr(args, "checkpoint_activations", False) + if checkpoint: + offload_to_cpu = getattr(args, "offload_activations", False) + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = ( + getattr(args, "min_params_to_wrap", DEFAULT_MIN_PARAMS_TO_WRAP) + if not checkpoint + else 0 + ) + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward( + self, + prev_output_tokens, + tgt_mask, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + tgt_mask, + encoder_out=encoder_out, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + + return x, extra + + def extract_features( + self, + prev_output_tokens, + tgt_mask, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + return self.extract_features_scriptable( + prev_output_tokens, + tgt_mask, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + + """ + A scriptable subclass of this class has an extract_features method and calls + super().extract_features, but super() is not supported in torchscript. A copy of + this function is made to be used in the subclass instead. + """ + + def extract_features_scriptable( + self, + prev_output_tokens, + tgt_mask, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + """ + Similar to *forward* but only return features. + + Includes several features from "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + bs = prev_output_tokens.size(0) + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + enc: Optional[Tensor] = None + padding_mask: Optional[Tensor] = None + if encoder_out is not None and len(encoder_out["encoder_out"]) > 0: + enc = encoder_out["encoder_out"][0] + assert ( + enc.size()[1] == bs + ), f"Expected enc.shape == (t, {bs}, c) got {enc.shape}" + if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0: + padding_mask = encoder_out["encoder_padding_mask"][0] + + # B x T x C -> T x B x C + x = prev_output_tokens.transpose(0, 1) + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or tgt_mask is not None: + self_attn_padding_mask = tgt_mask + + ## relative position embedding + if self.args.relative_position_embedding: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + # decoder layers + attn_list = [] + attn: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = layer( + x, + enc, + padding_mask, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer or alignment_layer == -1)), + need_head_weights=bool((idx == alignment_layer or alignment_layer == -1)), + pos_bias=pos_k, + ) + inner_states.append(x) + if layer_attn is not None and (idx == alignment_layer or alignment_layer == -1): + attn = layer_attn.float().to(x) + attn_list.append(attn.transpose(0, 1)) + + if attn is not None and len(attn_list) == 1: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + return x, {"attn": [attn if len(attn_list) <= 1 else attn_list], "inner_states": inner_states} + + # def max_positions(self): + # """Maximum output length supported by the decoder.""" + # return self.max_target_positions + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround. + if ( + self._future_mask.size(0) == 0 + or (not self._future_mask.device == tensor.device) + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(torch.zeros([dim, dim], device=tensor.device)), 1, + ) + else: + self._future_mask = self._future_mask.to(tensor) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + for i in range(self.num_layers): + # update layer norms + layer_norm_map = { + "0": "self_attn_layer_norm", + "1": "encoder_attn_layer_norm", + "2": "final_layer_norm", + } + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m) + if k in state_dict: + state_dict[ + "{}.layers.{}.{}.{}".format(name, i, new, m) + ] = state_dict[k] + del state_dict[k] + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + def set_num_updates(self, num_updates): + """State from trainer to pass along to model at every update.""" + + def _apply(m): + if hasattr(m, "set_num_updates") and m != self: + m.set_num_updates(num_updates) + + self.apply(_apply) diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/encoder.py b/SpeechT5/SpeechT5/speecht5/models/modules/encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..0deb193285497d40da3286c48016c5c12fa6710f --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/encoder.py @@ -0,0 +1,381 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +from typing import Dict, List + +import numpy as np +import torch +import torch.nn as nn +import contextlib +from fairseq import utils +from fairseq.models import ( + FairseqEncoder, +) +from fairseq.modules import ( + FairseqDropout, + LayerNorm, + TransformerEncoderLayer, +) +from torch import Tensor +from .transformer_layer import TransformerSentenceEncoderLayer + + + +DEFAULT_MIN_PARAMS_TO_WRAP = int(1e8) + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m + + +class RelativePositionalEncoding(torch.nn.Module): + def __init__(self, d_model, maxlen=1000, embed_v=False): + super(RelativePositionalEncoding, self).__init__() + + self.d_model = d_model + self.maxlen = maxlen + self.pe_k = torch.nn.Embedding(2*maxlen, d_model) + if embed_v: + self.pe_v = torch.nn.Embedding(2*maxlen, d_model) + self.embed_v = embed_v + + + def forward(self, pos_seq): + pos_seq[pos_seq < -self.maxlen] = -self.maxlen + pos_seq[pos_seq >= self.maxlen] = self.maxlen - 1 + pos_seq = pos_seq + self.maxlen + if self.embed_v: + return self.pe_k(pos_seq), self.pe_v(pos_seq) + else: + return self.pe_k(pos_seq), None + +class TransformerEncoder(FairseqEncoder): + """ + Transformer encoder consisting of *args.encoder_layers* layers. Each layer + is a :class:`TransformerEncoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): encoding dictionary + embed_tokens (torch.nn.Embedding): input embedding + """ + + def __init__(self, args, tgt_dict=None, embed_tokens=None): + self.args = args + super().__init__(None) + self.register_buffer("version", torch.Tensor([3])) + + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.encoder_layerdrop = args.encoder_layerdrop + self.freeze_encoder_updates = args.freeze_encoder_updates + if args.no_freeze_encoder_layer is not None: + self.no_freeze_encoder_layer = eval(args.no_freeze_encoder_layer) + else: + self.no_freeze_encoder_layer = None + self.num_updates = 0 + export = getattr(args, "export", False) + + self.layers = nn.ModuleList([]) + self.layers.extend( + [self.build_encoder_layer(args) for i in range(args.encoder_layers)] + ) + self.num_layers = len(self.layers) + + self.use_sent_enc_layer = args.use_sent_enc_layer + self.unb_enc_layer = getattr(args, "unb_enc_layer", -1) + + self.layer_norm_first = args.layer_norm_first + self.layer_norm = LayerNorm(args.encoder_embed_dim, eps=args.layer_norm_eps, export=export) + + if args.share_ctc_embed and embed_tokens is not None: + self.proj = nn.Linear( + embed_tokens.weight.shape[1], + embed_tokens.weight.shape[0], + bias=False, + ) + self.proj.weight = embed_tokens.weight + elif tgt_dict is not None: + self.proj = Linear(args.encoder_embed_dim, len(tgt_dict)) + else: + self.proj = None + + if args.relative_position_embedding: + self.pos_emb = RelativePositionalEncoding(args.encoder_embed_dim//args.encoder_attention_heads, args.encoder_max_relative_position) + + + def build_encoder_layer(self, args): + if args.use_sent_enc_layer: + layer = TransformerSentenceEncoderLayer( + embedding_dim=args.encoder_embed_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=args.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + has_relative_attention_bias=args.relative_position_embedding, + ) + else: + layer = TransformerEncoderLayer(args) + return layer + + def forward( + self, + encoder_in, + encoder_padding_mask, + return_all_hiddens: bool = False, + tgt_layer=None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + if self.no_freeze_encoder_layer is None: + ft = self.freeze_encoder_updates <= self.num_updates + else: + ft = True + with torch.no_grad() if not ft else contextlib.ExitStack(): + encoder_out = self.forward_scriptable( + encoder_in, encoder_padding_mask, return_all_hiddens, tgt_layer=tgt_layer, + ) + + # CTC and bert + if self.proj: + x_for_ctc = self.proj(self.dropout_module(encoder_out["encoder_out"][0])) + else: + x_for_ctc = None + + encoder_out["encoder_out_for_ctc"] = [x_for_ctc] # T x B x C + + return encoder_out + + # TorchScript doesn't support super() method so that the scriptable Subclass + # can't access the base class model in Torchscript. + # Current workaround is to add a helper function with different name and + # call the helper function from scriptable Subclass. + def forward_scriptable( + self, + encoder_in, + encoder_padding_mask, + return_all_hiddens: bool = False, + tgt_layer=None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + if self.no_freeze_encoder_layer is not None: + ft = self.freeze_encoder_updates <= self.num_updates + else: + ft = True + with torch.no_grad() if not ft else contextlib.ExitStack(): + # compute padding mask + if not self.use_sent_enc_layer: + has_pads = encoder_in.device.type == "xla" or encoder_padding_mask.any() + + if not self.layer_norm_first: + encoder_in = self.layer_norm(encoder_in) + + encoder_in = self.dropout_module(encoder_in) + + # B x T x C -> T x B x C + x = encoder_in.transpose(0, 1) + + encoder_states = [] + + if return_all_hiddens: + encoder_states.append(x) + + ## relative position embedding + if self.args.relative_position_embedding: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + # encoder layers + r = None + d = None + for i, layer in enumerate(self.layers): + dropout_probability = np.random.random() + + with torch.no_grad() if (not ft) and i not in self.no_freeze_encoder_layer else contextlib.ExitStack(): + if not self.training or (dropout_probability > self.encoder_layerdrop) or i == self.unb_enc_layer: + if self.use_sent_enc_layer: + x, _ = layer(x, self_attn_padding_mask=encoder_padding_mask, self_attn_mask=None, need_weights=False, pos_bias=pos_k) + # x, _ = layer(x, self_attn_padding_mask=encoder_padding_mask, need_weights=False, pos_bias=pos_k) + else: + x = layer(x, encoder_padding_mask=encoder_padding_mask if has_pads else None, attn_mask=None) + # x = layer(x, encoder_padding_mask=encoder_padding_mask if has_pads else None) + if i == self.unb_enc_layer: + d = x + + if i == tgt_layer: + r = x + break + + if return_all_hiddens: + assert encoder_states is not None + encoder_states.append(x) + + with torch.no_grad() if not ft else contextlib.ExitStack(): + # Finally T x B x C + if self.layer_norm_first: + x = self.layer_norm(x.transpose(0, 1)).transpose(0, 1) + + if r is not None: + x = r + + # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in + # `forward` so we use a dictionary instead. + # TorchScript does not support mixed values so the values are all lists. + # The empty list is equivalent to None. + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask], # B x T + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": [], + "decoder_input": [d], + } + + @torch.jit.export + def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + if len(encoder_out["encoder_out"]) == 0: + new_encoder_out = [] + else: + new_encoder_out = [encoder_out["encoder_out"][0].index_select(1, new_order)] + + if len(encoder_out["encoder_out_for_ctc"]) == 0: + new_x_for_ctc = [] + else: + new_x_for_ctc = [encoder_out["encoder_out_for_ctc"][0].index_select(1, new_order)] + + if len(encoder_out["encoder_padding_mask"]) == 0: + new_encoder_padding_mask = [] + else: + new_encoder_padding_mask = [ + encoder_out["encoder_padding_mask"][0].index_select(0, new_order) + ] + + if len(encoder_out["src_tokens"]) == 0: + src_tokens = [] + else: + src_tokens = [(encoder_out["src_tokens"][0]).index_select(0, new_order)] + + if len(encoder_out["decoder_input"]) == 0 or encoder_out["decoder_input"][0] is None: + new_decoder_input = [] + else: + new_decoder_input = [ + encoder_out["decoder_input"][0].index_select(0, new_order) + ] + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": src_tokens, # B x T + "encoder_out_for_ctc": new_x_for_ctc, # T x B x C + "decoder_input": new_decoder_input, + } + + # def max_positions(self): + # """Maximum input length supported by the encoder.""" + # return self.max_source_positions + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + # if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + # weights_key = "{}.embed_positions.weights".format(name) + # if weights_key in state_dict: + # print("deleting {0}".format(weights_key)) + # del state_dict[weights_key] + # state_dict[ + # "{}.embed_positions._float_tensor".format(name) + # ] = torch.FloatTensor(1) + for i in range(self.num_layers): + # update layer norms + if not isinstance(self.layers[i], TransformerSentenceEncoderLayer): + self.layers[i].upgrade_state_dict_named( + state_dict, "{}.layers.{}".format(name, i) + ) + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + return state_dict + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + \ No newline at end of file diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/multihead_attention.py b/SpeechT5/SpeechT5/speecht5/models/modules/multihead_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..fb126ef6b72d61b9cc50bceca2504976c307f865 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/multihead_attention.py @@ -0,0 +1,522 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import math +from typing import Dict, Optional, Tuple + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.incremental_decoding_utils import with_incremental_state +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.quant_noise import quant_noise +from torch import Tensor, nn +from torch.nn import Parameter + + +@with_incremental_state +class MultiheadAttention(nn.Module): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + has_relative_attention_bias=False, + ): + super().__init__() + self.embed_dim = embed_dim + self.kdim = kdim if kdim is not None else embed_dim + self.vdim = vdim if vdim is not None else embed_dim + self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim + + self.num_heads = num_heads + self.dropout_module = FairseqDropout( + dropout, module_name=self.__class__.__name__ + ) + + self.has_relative_attention_bias = has_relative_attention_bias + self.head_dim = embed_dim // num_heads + assert ( + self.head_dim * num_heads == self.embed_dim + ), "embed_dim must be divisible by num_heads" + self.scaling = self.head_dim ** -0.5 + + self.self_attention = self_attention + self.encoder_decoder_attention = encoder_decoder_attention + + assert not self.self_attention or self.qkv_same_dim, ( + "Self-attention requires query, key and " "value to be of the same size" + ) + + self.k_proj = quant_noise( + nn.Linear(self.kdim, embed_dim, bias=bias), q_noise, qn_block_size + ) + self.v_proj = quant_noise( + nn.Linear(self.vdim, embed_dim, bias=bias), q_noise, qn_block_size + ) + self.q_proj = quant_noise( + nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size + ) + + self.out_proj = quant_noise( + nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size + ) + + if add_bias_kv: + self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim)) + self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim)) + else: + self.bias_k = self.bias_v = None + + self.add_zero_attn = add_zero_attn + + self.reset_parameters() + + self.onnx_trace = False + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def reset_parameters(self): + if self.qkv_same_dim: + # Empirically observed the convergence to be much better with + # the scaled initialization + nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2)) + nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2)) + nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2)) + else: + nn.init.xavier_uniform_(self.k_proj.weight) + nn.init.xavier_uniform_(self.v_proj.weight) + nn.init.xavier_uniform_(self.q_proj.weight) + + nn.init.xavier_uniform_(self.out_proj.weight) + if self.out_proj.bias is not None: + nn.init.constant_(self.out_proj.bias, 0.0) + if self.bias_k is not None: + nn.init.xavier_normal_(self.bias_k) + if self.bias_v is not None: + nn.init.xavier_normal_(self.bias_v) + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + position_bias: Optional[Tensor] = None + ) -> Tuple[Tensor, Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + is_tpu = query.device.type == "xla" + + tgt_len, bsz, embed_dim = query.size() + src_len = tgt_len + assert embed_dim == self.embed_dim + assert list(query.size()) == [tgt_len, bsz, embed_dim] + if key is not None: + src_len, key_bsz, _ = key.size() + if not torch.jit.is_scripting(): + assert key_bsz == bsz + assert value is not None + assert src_len, bsz == value.shape[:2] + + if ( + not self.onnx_trace + and not is_tpu # don't use PyTorch version on TPUs + and incremental_state is None + and not static_kv + # A workaround for quantization to work. Otherwise JIT compilation + # treats bias in linear module as method. + and not torch.jit.is_scripting() + and not self.has_relative_attention_bias + ): + assert key is not None and value is not None + return F.multi_head_attention_forward( + query, + key, + value, + self.embed_dim, + self.num_heads, + torch.empty([0]), + torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), + self.bias_k, + self.bias_v, + self.add_zero_attn, + self.dropout_module.p, + self.out_proj.weight, + self.out_proj.bias, + self.training or self.dropout_module.apply_during_inference, + key_padding_mask, + need_weights, + attn_mask, + use_separate_proj_weight=True, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + ) + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if k is not None: + k = ( + k.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + src_len = k.size(1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + assert k.size(1) == src_len + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + torch.zeros(key_padding_mask.size(0), 1).type_as( + key_padding_mask + ), + ], + dim=1, + ) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + if position_bias is not None and self.has_relative_attention_bias: ## first order + ## position_bias: [241, 241, 64] + #print ("attn_weights: ", attn_weights.size()) # [492, 241, 241] + reshape_q = q.contiguous().view(bsz * self.num_heads, -1, self.head_dim).transpose(0,1) #[241, 492, 64] + #print ("reshape_q: ", reshape_q.size()) + B = torch.matmul(reshape_q, position_bias.transpose(-2, -1)) + #print ("B: ", B.size()) ## [241, 492, 241] + #B = B.transpose(0, 1).view(bsz, self.num_heads, position_bias.size(0), position_bias.size(1)) + B = B.transpose(0, 1).view(bsz*self.num_heads, position_bias.size(0), position_bias.size(1)) + #print ("B 2: ", B.size()) + attn_weights += B + else: + position_bias = None + + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + if not is_tpu: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if before_softmax: + return attn_weights, v + + attn_weights_float = utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace + ) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + if self.onnx_trace and attn.size(1) == 1: + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + attn_weights = attn_weights_float.view( + bsz, self.num_heads, tgt_len, src_len + ).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights + + @staticmethod + def _append_prev_key_padding_mask( + key_padding_mask: Optional[Tensor], + prev_key_padding_mask: Optional[Tensor], + batch_size: int, + src_len: int, + static_kv: bool, + ) -> Optional[Tensor]: + # saved key padding masks have shape (bsz, seq_len) + if prev_key_padding_mask is not None and static_kv: + new_key_padding_mask = prev_key_padding_mask + elif prev_key_padding_mask is not None and key_padding_mask is not None: + new_key_padding_mask = torch.cat( + [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1 + ) + # During incremental decoding, as the padding token enters and + # leaves the frame, there will be a time when prev or current + # is None + elif prev_key_padding_mask is not None: + if src_len > prev_key_padding_mask.size(1): + filler = torch.zeros( + (batch_size, src_len - prev_key_padding_mask.size(1)), + device=prev_key_padding_mask.device, + ) + new_key_padding_mask = torch.cat( + [prev_key_padding_mask.float(), filler.float()], dim=1 + ) + else: + new_key_padding_mask = prev_key_padding_mask.float() + elif key_padding_mask is not None: + if src_len > key_padding_mask.size(1): + filler = torch.zeros( + (batch_size, src_len - key_padding_mask.size(1)), + device=key_padding_mask.device, + ) + new_key_padding_mask = torch.cat( + [filler.float(), key_padding_mask.float()], dim=1 + ) + else: + new_key_padding_mask = key_padding_mask.float() + else: + new_key_padding_mask = prev_key_padding_mask + return new_key_padding_mask + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + new_order: Tensor, + ): + """Reorder buffered internal state (for incremental generation).""" + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + for k in input_buffer.keys(): + input_buffer_k = input_buffer[k] + if input_buffer_k is not None: + if self.encoder_decoder_attention and input_buffer_k.size( + 0 + ) == new_order.size(0): + break + input_buffer[k] = input_buffer_k.index_select(0, new_order) + incremental_state = self._set_input_buffer(incremental_state, input_buffer) + return incremental_state + + def _get_input_buffer( + self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] + ) -> Dict[str, Optional[Tensor]]: + result = self.get_incremental_state(incremental_state, "attn_state") + if result is not None: + return result + else: + empty_result: Dict[str, Optional[Tensor]] = {} + return empty_result + + def _set_input_buffer( + self, + incremental_state: Dict[str, Dict[str, Optional[Tensor]]], + buffer: Dict[str, Optional[Tensor]], + ): + return self.set_incremental_state(incremental_state, "attn_state", buffer) + + def apply_sparse_mask(self, attn_weights, tgt_len: int, src_len: int, bsz: int): + return attn_weights + + def upgrade_state_dict_named(self, state_dict, name): + prefix = name + "." if name != "" else "" + items_to_add = {} + keys_to_remove = [] + for k in state_dict.keys(): + if k.endswith(prefix + "in_proj_weight"): + # in_proj_weight used to be q + k + v with same dimensions + dim = int(state_dict[k].shape[0] / 3) + items_to_add[prefix + "q_proj.weight"] = state_dict[k][:dim] + items_to_add[prefix + "k_proj.weight"] = state_dict[k][dim : 2 * dim] + items_to_add[prefix + "v_proj.weight"] = state_dict[k][2 * dim :] + + keys_to_remove.append(k) + + k_bias = prefix + "in_proj_bias" + if k_bias in state_dict.keys(): + dim = int(state_dict[k].shape[0] / 3) + items_to_add[prefix + "q_proj.bias"] = state_dict[k_bias][:dim] + items_to_add[prefix + "k_proj.bias"] = state_dict[k_bias][ + dim : 2 * dim + ] + items_to_add[prefix + "v_proj.bias"] = state_dict[k_bias][2 * dim :] + + keys_to_remove.append(prefix + "in_proj_bias") + + for k in keys_to_remove: + del state_dict[k] + + for key, value in items_to_add.items(): + state_dict[key] = value diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/speaker_decoder_postnet.py b/SpeechT5/SpeechT5/speecht5/models/modules/speaker_decoder_postnet.py new file mode 100644 index 0000000000000000000000000000000000000000..555ddef0475305f5be581a58d155ff358269e051 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/speaker_decoder_postnet.py @@ -0,0 +1,197 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import torch.nn as nn +import math +import torch +import torch.nn.functional as F + + +class AngularMargin(nn.Module): + """ + An implementation of Angular Margin (AM) proposed in the following + paper: '''Margin Matters: Towards More Discriminative Deep Neural Network + Embeddings for Speaker Recognition''' (https://arxiv.org/abs/1906.07317) + + Arguments + --------- + margin : float + The margin for cosine similiarity + scale : float + The scale for cosine similiarity + + Return + --------- + predictions : torch.Tensor + + Example + ------- + >>> pred = AngularMargin() + >>> outputs = torch.tensor([ [1., -1.], [-1., 1.], [0.9, 0.1], [0.1, 0.9] ]) + >>> targets = torch.tensor([ [1., 0.], [0., 1.], [ 1., 0.], [0., 1.] ]) + >>> predictions = pred(outputs, targets) + >>> predictions[:,0] > predictions[:,1] + tensor([ True, False, True, False]) + """ + + def __init__(self, margin=0.0, scale=1.0): + super(AngularMargin, self).__init__() + self.margin = margin + self.scale = scale + + def forward(self, outputs, targets): + """Compute AM between two tensors + + Arguments + --------- + outputs : torch.Tensor + The outputs of shape [N, C], cosine similarity is required. + targets : torch.Tensor + The targets of shape [N, C], where the margin is applied for. + + Return + --------- + predictions : torch.Tensor + """ + outputs = outputs - self.margin * targets + return self.scale * outputs + + +class AdditiveAngularMargin(AngularMargin): + """ + An implementation of Additive Angular Margin (AAM) proposed + in the following paper: '''Margin Matters: Towards More Discriminative Deep + Neural Network Embeddings for Speaker Recognition''' + (https://arxiv.org/abs/1906.07317) + + Arguments + --------- + margin : float + The margin for cosine similiarity, usually 0.2. + scale: float + The scale for cosine similiarity, usually 30. + + Returns + ------- + predictions : torch.Tensor + Tensor. + Example + ------- + >>> outputs = torch.tensor([ [1., -1.], [-1., 1.], [0.9, 0.1], [0.1, 0.9] ]) + >>> targets = torch.tensor([ [1., 0.], [0., 1.], [ 1., 0.], [0., 1.] ]) + >>> pred = AdditiveAngularMargin() + >>> predictions = pred(outputs, targets) + >>> predictions[:,0] > predictions[:,1] + tensor([ True, False, True, False]) + """ + + def __init__(self, margin=0.0, scale=1.0, easy_margin=False): + super(AdditiveAngularMargin, self).__init__(margin, scale) + self.easy_margin = easy_margin + + self.cos_m = math.cos(self.margin) + self.sin_m = math.sin(self.margin) + self.th = math.cos(math.pi - self.margin) + self.mm = math.sin(math.pi - self.margin) * self.margin + + def forward(self, outputs, targets): + """ + Compute AAM between two tensors + + Arguments + --------- + outputs : torch.Tensor + The outputs of shape [N, C], cosine similarity is required. + targets : torch.Tensor + The targets of shape [N, C], where the margin is applied for. + + Return + --------- + predictions : torch.Tensor + """ + cosine = outputs.float() + sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1)) + phi = cosine * self.cos_m - sine * self.sin_m # cos(theta + m) + if self.easy_margin: + phi = torch.where(cosine > 0, phi, cosine) + else: + phi = torch.where(cosine > self.th, phi, cosine - self.mm) + outputs = (targets * phi) + ((1.0 - targets) * cosine) + return self.scale * outputs + + +class SpeakerDecoderPostnet(nn.Module): + """Speaker Identification Postnet. + + Arguments + --------- + embed_dim : int + The size of embedding. + class_num: int + The number of classes. + args : Namespace + + Return + --------- + embed : torch.Tensor + output : torch.Tensor + """ + + def __init__(self, embed_dim, class_num, args): + super(SpeakerDecoderPostnet, self).__init__() + self.embed_dim = embed_dim + self.class_num = class_num + self.no_pooling_bn = getattr(args, "sid_no_pooling_bn", False) + self.no_embed_postnet = getattr(args, "sid_no_embed_postnet", False) + self.normalize_postnet = getattr(args, "sid_normalize_postnet", False) + self.softmax_head = getattr(args, "sid_softmax_type", "softmax") + if not self.no_pooling_bn: + self.bn_pooling = nn.BatchNorm1d(args.decoder_output_dim) + else: + self.bn_pooling = None + if not self.no_embed_postnet: + self.output_embedding = nn.Linear(args.decoder_output_dim, embed_dim, bias=False) + self.bn_embedding = nn.BatchNorm1d(embed_dim) + else: + self.output_embedding = None + self.bn_embedding = None + self.embed_dim = args.decoder_output_dim + self.output_projection = nn.Linear(self.embed_dim, class_num, bias=False) + if self.softmax_head == "amsoftmax": + self.output_layer = AngularMargin(args.softmax_margin, args.softmax_scale) + elif self.softmax_head == "aamsoftmax": + self.output_layer = AdditiveAngularMargin(args.softmax_margin, args.softmax_scale, args.softmax_easy_margin) + else: + self.output_layer = None + if self.output_embedding is not None: + nn.init.normal_(self.output_embedding.weight, mean=0, std=embed_dim ** -0.5) + nn.init.normal_(self.output_projection.weight, mean=0, std=class_num ** -0.5) + + def forward(self, x, target=None): + """ + Parameters + ---------- + x : torch.Tensor of shape [batch, channel] or [batch, time, channel] + target : torch.Tensor of shape [batch, channel] + """ + if self.bn_pooling is not None: + x = self.bn_pooling(x) + if self.output_embedding is not None and self.bn_embedding is not None: + embed = self.bn_embedding(self.output_embedding(x)) + else: + embed = x + if self.output_layer is not None or self.normalize_postnet: + x_norm = F.normalize(embed, p=2, dim=1) + w_norm = F.normalize(self.output_projection.weight, p=2, dim=1) # [out_dim, in_dim] + output = F.linear(x_norm, w_norm) + if self.training and target is not None and self.output_layer is not None: + output = self.output_layer(output, target) + else: + output = self.output_projection(embed) + return output, embed diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/speech_decoder_postnet.py b/SpeechT5/SpeechT5/speecht5/models/modules/speech_decoder_postnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6e357be150f72f0b9bb27855a4417eb743763134 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/speech_decoder_postnet.py @@ -0,0 +1,76 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import contextlib +import torch +import torch.nn as nn + +from espnet.nets.pytorch_backend.tacotron2.decoder import Postnet + + +class SpeechDecoderPostnet(nn.Module): + """ + + Args: + in_channels (int): the number of input channels + mid_channels (int): the number of intermediate channels + out_channels (int): the number of output channels + kernel_sizes (List[int]): the kernel size for each convolutional layer + """ + + def __init__( + self, + odim, + args, + ): + super(SpeechDecoderPostnet, self).__init__() + # define decoder postnet + # define final projection + self.feat_out = torch.nn.Linear(args.decoder_embed_dim, odim * args.reduction_factor) + self.prob_out = torch.nn.Linear(args.decoder_embed_dim, args.reduction_factor) + + # define postnet + self.postnet = ( + None + if args.postnet_layers == 0 + else Postnet( + idim=0, + odim=odim, + n_layers=args.postnet_layers, + n_chans=args.postnet_chans, + n_filts=args.postnet_filts, + use_batch_norm=args.use_batch_norm, + dropout_rate=args.postnet_dropout_rate, + ) + ) + + self.odim = odim + self.num_updates = 0 + self.freeze_decoder_updates = args.freeze_decoder_updates + + def forward(self, zs): + ft = self.freeze_decoder_updates <= self.num_updates + with torch.no_grad() if not ft else contextlib.ExitStack(): + # (B, Lmax//r, odim * r) -> (B, Lmax//r * r, odim) + before_outs = self.feat_out(zs).view(zs.size(0), -1, self.odim) + # (B, Lmax//r, r) -> (B, Lmax//r * r) + logits = self.prob_out(zs).view(zs.size(0), -1) + # postnet -> (B, Lmax//r * r, odim) + if self.postnet is None: + after_outs = before_outs + else: + after_outs = before_outs + self.postnet( + before_outs.transpose(1, 2) + ).transpose(1, 2) + + return before_outs, after_outs, logits + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + self.num_updates = num_updates diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/speech_decoder_prenet.py b/SpeechT5/SpeechT5/speecht5/models/modules/speech_decoder_prenet.py new file mode 100644 index 0000000000000000000000000000000000000000..bd89584606701f47c1882b67b17afa0f3d80207c --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/speech_decoder_prenet.py @@ -0,0 +1,110 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import contextlib +import torch +import torch.nn as nn + +import torch.nn.functional as F +from espnet.nets.pytorch_backend.tacotron2.decoder import Prenet as TacotronDecoderPrenet +from espnet.nets.pytorch_backend.transformer.embedding import PositionalEncoding +from espnet.nets.pytorch_backend.transformer.embedding import ScaledPositionalEncoding +from espnet.nets.pytorch_backend.nets_utils import make_non_pad_mask + + +class SpeechDecoderPrenet(nn.Module): + """ + + Args: + in_channels (int): the number of input channels + mid_channels (int): the number of intermediate channels + out_channels (int): the number of output channels + kernel_sizes (List[int]): the kernel size for each convolutional layer + """ + + def __init__( + self, + odim, + args, + ): + super(SpeechDecoderPrenet, self).__init__() + # define decoder prenet + if args.dprenet_layers != 0: + # decoder prenet + decoder_input_layer = torch.nn.Sequential( + TacotronDecoderPrenet( + idim=odim, + n_layers=args.dprenet_layers, + n_units=args.dprenet_units, + dropout_rate=args.dprenet_dropout_rate, + ), + torch.nn.Linear(args.dprenet_units, args.decoder_embed_dim), + ) + else: + decoder_input_layer = "linear" + + pos_enc_class = ( + ScaledPositionalEncoding if args.dec_use_scaled_pos_enc else PositionalEncoding + ) + + if decoder_input_layer == "linear": + self.decoder_prenet = torch.nn.Sequential( + torch.nn.Linear(odim, args.decoder_embed_dim), + torch.nn.LayerNorm(args.decoder_embed_dim), + torch.nn.Dropout(args.transformer_dec_dropout_rate), + torch.nn.ReLU(), + pos_enc_class(args.decoder_embed_dim, args.transformer_dec_positional_dropout_rate), + ) + elif isinstance(decoder_input_layer, torch.nn.Module): + self.decoder_prenet = torch.nn.Sequential( + decoder_input_layer, pos_enc_class(args.decoder_embed_dim, args.transformer_dec_positional_dropout_rate, max_len=args.max_speech_positions) + ) + + if args.spk_embed_integration_type == 'pre': + self.spkembs_layer = torch.nn.Sequential( + torch.nn.Linear(args.spk_embed_dim + args.decoder_embed_dim, args.decoder_embed_dim), torch.nn.ReLU() + ) + self.num_updates = 0 + self.freeze_decoder_updates = args.freeze_decoder_updates + + def forward(self, prev_output_tokens, tgt_lengths_in=None, spkembs=None): + ft = self.freeze_decoder_updates <= self.num_updates + with torch.no_grad() if not ft else contextlib.ExitStack(): + prev_output_tokens = self.decoder_prenet(prev_output_tokens) + + if spkembs is not None: + spkembs = F.normalize(spkembs).unsqueeze(1).expand(-1, prev_output_tokens.size(1), -1) + prev_output_tokens = self.spkembs_layer(torch.cat([prev_output_tokens, spkembs], dim=-1)) + + if tgt_lengths_in is not None: + tgt_frames_mask = ~(self._source_mask(tgt_lengths_in).squeeze(1)) + else: + tgt_frames_mask = None + return prev_output_tokens, tgt_frames_mask + + def _source_mask(self, ilens): + """Make masks for self-attention. + Args: + ilens (LongTensor or List): Batch of lengths (B,). + Returns: + Tensor: Mask tensor for self-attention. + dtype=torch.uint8 in PyTorch 1.2- + dtype=torch.bool in PyTorch 1.2+ (including 1.2) + Examples: + >>> ilens = [5, 3] + >>> self._source_mask(ilens) + tensor([[[1, 1, 1, 1, 1], + [[1, 1, 1, 0, 0]]], dtype=torch.uint8) + """ + x_masks = make_non_pad_mask(ilens).to(next(self.parameters()).device) + return x_masks.unsqueeze(-2) + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + self.num_updates = num_updates diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/speech_encoder_postnet.py b/SpeechT5/SpeechT5/speecht5/models/modules/speech_encoder_postnet.py new file mode 100644 index 0000000000000000000000000000000000000000..ae8371bcb2c01065636e078962249c7fd1f968f8 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/speech_encoder_postnet.py @@ -0,0 +1,124 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import logging +import torch.nn as nn +import torch + + +logger = logging.getLogger(__name__) + +class SpeechEncoderPostnet(nn.Module): + """ + + Args: + in_channels (int): the number of input channels + mid_channels (int): the number of intermediate channels + out_channels (int): the number of output channels + kernel_sizes (List[int]): the kernel size for each convolutional layer + """ + + def __init__(self, dictionaries, args): + super(SpeechEncoderPostnet, self).__init__() + # modules below are not needed during fine-tuning + self.target_glu = args.target_glu + self.skip_masked = args.skip_masked + self.skip_nomask = args.skip_nomask + self.logit_temp = args.logit_temp + + final_dim = ( + args.final_dim if args.final_dim > 0 else args.encoder_embed_dim + ) + if any([d is None for d in dictionaries]): + logger.info( + "cannot find dictionary. assume will be used for fine-tuning" + ) + else: + self.num_classes = [len(d) for d in dictionaries] + self.label_embs_concat = nn.Parameter( + torch.FloatTensor(sum(self.num_classes), final_dim) + ) + nn.init.uniform_(self.label_embs_concat) + self.untie_final_proj = args.untie_final_proj + if self.untie_final_proj: + self.final_proj = nn.Linear( + args.encoder_embed_dim, final_dim * len(dictionaries) + ) + else: + self.final_proj = nn.Linear(args.encoder_embed_dim, final_dim) + + def compute_nce(self, x, pos, negs): + neg_is_pos = (pos == negs).all(-1) + pos = pos.unsqueeze(0) + targets = torch.cat([pos, negs], dim=0) + + logits = torch.cosine_similarity( + x.float(), targets.float(), dim=-1 + ).type_as(x) + logits /= self.logit_temp + if neg_is_pos.any(): + logits[1:][neg_is_pos] = float("-inf") + logits = logits.transpose(0, 1) # (num_x, num_cls+1) + return logits + + def forward(self, x, padding_mask, mask_indices, target_list): + def compute_pred(proj_x, target, label_embs): + # compute logits for the i-th label set + y = torch.index_select(label_embs, 0, target.long()) + negs = label_embs.unsqueeze(1).expand(-1, proj_x.size(0), -1) + if self.target_glu: + y = self.target_glu(y) + negs = self.target_glu(negs) + # proj_x: (S, D) + # y: (S, D) + # negs: (Neg, S, D) + return self.compute_nce(proj_x, y, negs) + + label_embs_list = self.label_embs_concat.split(self.num_classes, 0) + + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + proj_x_m = self.final_proj(x[masked_indices]) + if self.untie_final_proj: + proj_x_m_list = proj_x_m.chunk(len(target_list), dim=-1) + else: + proj_x_m_list = [proj_x_m for _ in range(len(target_list))] + logit_m_list = [ + compute_pred(proj_x_m, t[masked_indices], label_embs_list[i]) + for i, (proj_x_m, t) in enumerate( + zip(proj_x_m_list, target_list) + ) + ] + else: + logit_m_list = [None for _ in target_list] + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + proj_x_u = self.final_proj(x[nomask_indices]) + if self.untie_final_proj: + proj_x_u_list = proj_x_u.chunk(len(target_list), dim=-1) + else: + proj_x_u_list = [proj_x_u for _ in range(len(target_list))] + + logit_u_list = [ + compute_pred(proj_x_u, t[nomask_indices], label_embs_list[i]) + for i, (proj_x_u, t) in enumerate( + zip(proj_x_u_list, target_list) + ) + ] + else: + logit_u_list = [None for _ in target_list] + + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "padding_mask": padding_mask, + } + + return result diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/speech_encoder_prenet.py b/SpeechT5/SpeechT5/speecht5/models/modules/speech_encoder_prenet.py new file mode 100644 index 0000000000000000000000000000000000000000..89e4a7d5a9b0cb50ed3d99aa54a7e3729a6cf67e --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/speech_encoder_prenet.py @@ -0,0 +1,374 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import logging +import math +import torch +import contextlib +from typing import List, Tuple +import torch.nn as nn + +from fairseq.data.data_utils import lengths_to_padding_mask +from fairseq.data.data_utils import compute_mask_indices +from fairseq.modules import ( + PositionalEmbedding, + Fp32GroupNorm, + FairseqDropout, + SamePad, + GradMultiply, + LayerNorm, + Fp32LayerNorm, + TransposeLast, +) +import numpy as np + +logger = logging.getLogger(__name__) + + +class LinearLayer(nn.Module): + def __init__(self, idim, odom, dropout=0): + super(LinearLayer, self).__init__() + self.linear = nn.Sequential( + nn.Linear(idim, odom), + nn.LayerNorm(odom), + nn.Dropout(dropout), + nn.ReLU(), + ) + + def get_out_seq_lens_tensor(self, in_seq_lens_tensor): + out = in_seq_lens_tensor.clone() + return out + + def forward(self, src_tokens, src_lengths): + """ + src_tokens: [B, T, C] + src_lengths: [B] + """ + x = self.linear(src_tokens) + x = x.transpose(0, 1).contiguous() # -> T x B x C + return x, src_lengths + + +class SpeechEncoderPrenet(nn.Module): + """ + + Args: + in_channels (int): the number of input channels + mid_channels (int): the number of intermediate channels + out_channels (int): the number of output channels + kernel_sizes (List[int]): the kernel size for each convolutional layer + """ + + def __init__(self, args): + super(SpeechEncoderPrenet, self).__init__() + self.dropout_module = FairseqDropout( + p=args.dropout, module_name=self.__class__.__name__ + ) + self.embed_scale = math.sqrt(args.encoder_embed_dim) + if args.no_scale_embedding: + self.embed_scale = 1.0 + self.padding_idx = 1 + self.freeze_encoder_updates = args.freeze_encoder_updates + self.num_updates = 0 + assert args.encoder_speech_prenet in ["conv", "linear"], args.encoder_speech_prenet + feature_enc_layers = eval(args.conv_feature_layers) # noqa + self.embed = feature_enc_layers[-1][0] + + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + mode=args.extractor_mode, + conv_bias=args.conv_bias, + ) + feature_ds_rate = np.prod([s for _, _, s in feature_enc_layers]) + self.feat2tar_ratio = ( + args.label_rates * feature_ds_rate / args.sample_rate + ) + + self.post_extract_proj = ( + nn.Linear(self.embed, args.encoder_embed_dim) + if self.embed != args.encoder_embed_dim + else None + ) + + self.use_conv_pos = args.use_conv_pos + self.use_sinc_pos = args.use_sinc_pos + self.use_abs_pos = getattr(args, "use_abs_pos", False) + + self.feature_grad_mult = args.feature_grad_mult + if self.use_conv_pos: + self.layer_norm = LayerNorm(self.embed) + self.pos_conv = nn.Conv1d( + args.encoder_embed_dim, + args.encoder_embed_dim, + kernel_size=args.conv_pos, + padding=args.conv_pos // 2, + groups=args.conv_pos_groups, + ) + dropout = 0 + std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * args.encoder_embed_dim)) + nn.init.normal_(self.pos_conv.weight, mean=0, std=std) + nn.init.constant_(self.pos_conv.bias, 0) + self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2) + self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU()) + + assert not (self.use_sinc_pos and self.use_abs_pos), f"sinc pos: {self.use_sinc_pos} abs pos: {self.use_abs_pos}" + if self.use_sinc_pos: + self.embed_positions = PositionalEmbedding( + args.max_speech_positions, args.encoder_embed_dim, self.padding_idx + ) + if self.use_abs_pos: + self.embed_positions = PositionalEmbedding( + args.max_speech_positions, args.encoder_embed_dim, self.padding_idx, learned=True + ) + + # Hubert + self.mask_prob = args.mask_prob + self.mask_selection = args.mask_selection + self.mask_other = args.mask_other + self.hubert_mask_length = args.hubert_mask_length + self.no_mask_overlap = args.no_mask_overlap + self.mask_min_space = args.mask_min_space + + self.mask_channel_prob = args.mask_channel_prob + self.mask_channel_selection = args.mask_channel_selection + self.mask_channel_other = args.mask_channel_other + self.mask_channel_length = args.mask_channel_length + self.no_mask_channel_overlap = args.no_mask_channel_overlap + self.mask_channel_min_space = args.mask_channel_min_space + + self.mask_emb = nn.Parameter( + torch.FloatTensor(args.encoder_embed_dim).uniform_() + ) + + def forward(self, src_tokens, require_feat_pen=False, target_list=None, padding_mask=None, mask=True): + ft = self.freeze_encoder_updates <= self.num_updates + with torch.no_grad() if not ft else contextlib.ExitStack(): + return self._forward(src_tokens, require_feat_pen, target_list, padding_mask, mask) + + def _forward(self, src_tokens, require_feat_pen=False, target_list=None, padding_mask=None, mask=True): + if self.feature_grad_mult > 0: + x = self.feature_extractor(src_tokens) + x = x.transpose(1, 2).transpose(0, 1) # [length, batch, hidden_size] + if self.feature_grad_mult != 1.0: + x = GradMultiply.apply(x, self.feature_grad_mult) + else: + with torch.no_grad(): + x = self.feature_extractor(src_tokens) + x = x.transpose(1, 2).transpose(0, 1) # [length, batch, hidden_size] + x = x.transpose(0, 1) # [batch, length, hidden_size] + + encoder_padding_mask = padding_mask + + x = x.transpose(1, 2) # [batch, hidden_size, length] + if target_list is not None: + x, target_list = self.forward_targets(x, target_list) + features_pen = x.float().pow(2).mean() + x = x.transpose(1, 2) # [batch, length, hidden_size] + x = self.layer_norm(x) + encoder_padding_mask = self.forward_padding_mask(x, encoder_padding_mask) + if self.post_extract_proj is not None: + x = self.post_extract_proj(x) + x = self.dropout_module(x) + if mask: + x, mask_indices = self.apply_hubert_mask( + x, encoder_padding_mask + ) + else: + x = x + mask_indices = None + + if self.use_conv_pos: + positions = self.pos_conv(x.transpose(1, 2)) + positions = positions.transpose(1, 2) + #else: + # positions = self.embed_positions(encoder_padding_mask) + x = x + positions + + if self.use_sinc_pos: + positions = self.embed_positions(encoder_padding_mask) + x = x + positions + + # x = self.dropout_module(x) + + if require_feat_pen: + return (x, features_pen, mask_indices, target_list), encoder_padding_mask + else: + # For consistence with encoder + return x, encoder_padding_mask + + def forward_targets( + self, features: torch.Tensor, target_list: List[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Trim features to ensure labels exist and then get aligned labels + feat_tsz = features.size(2) + targ_tsz = min([t.size(1) for t in target_list]) + if self.feat2tar_ratio * feat_tsz > targ_tsz: + feat_tsz = int(targ_tsz / self.feat2tar_ratio) + features = features[..., :feat_tsz] + target_inds = torch.arange(feat_tsz).float() * self.feat2tar_ratio + target_list = [t[:, target_inds.long()] for t in target_list] + return features, target_list + + def forward_padding_mask( + self, features: torch.Tensor, padding_mask: torch.Tensor, + ) -> torch.Tensor: + extra = padding_mask.size(1) % features.size(1) + if extra > 0: + padding_mask = padding_mask[:, :-extra] + padding_mask = padding_mask.view( + padding_mask.size(0), features.size(1), -1 + ) + padding_mask = padding_mask.all(-1) + return padding_mask + + def get_src_lengths(self, src_lengths): + return self.feature_extractor.get_out_seq_lens_tensor(src_lengths) + + def apply_hubert_mask(self, x, padding_mask): + B, T, C = x.shape + if self.mask_prob > 0: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.hubert_mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + self.num_updates = num_updates + +class ConvFeatureExtractionModel(nn.Module): + def __init__( + self, + conv_layers: List[Tuple[int, int, int]], + dropout: float = 0.0, + mode: str = "default", + conv_bias: bool = False, + ): + super().__init__() + + assert mode in {"default", "layer_norm"} + + def block( + n_in, + n_out, + k, + stride, + is_layer_norm=False, + is_group_norm=False, + conv_bias=False, + ): + def make_conv(): + conv = nn.Conv1d(n_in, n_out, k, stride=stride, bias=conv_bias) + nn.init.kaiming_normal_(conv.weight) + return conv + + assert ( + is_layer_norm and is_group_norm + ) == False, "layer norm and group norm are exclusive" + + if is_layer_norm: + return nn.Sequential( + make_conv(), + nn.Dropout(p=dropout), + nn.Sequential( + TransposeLast(), + Fp32LayerNorm(dim, elementwise_affine=True), + TransposeLast(), + ), + nn.GELU(), + ) + elif is_group_norm: + return nn.Sequential( + make_conv(), + nn.Dropout(p=dropout), + Fp32GroupNorm(dim, dim, affine=True), + nn.GELU(), + ) + else: + return nn.Sequential(make_conv(), nn.Dropout(p=dropout), nn.GELU()) + + in_d = 1 + self.conv_layers = nn.ModuleList() + self.conv_layers_infos = conv_layers + for i, cl in enumerate(conv_layers): + assert len(cl) == 3, "invalid conv definition: " + str(cl) + (dim, k, stride) = cl + + self.conv_layers.append( + block( + in_d, + dim, + k, + stride, + is_layer_norm=mode == "layer_norm", + is_group_norm=mode == "default" and i == 0, + conv_bias=conv_bias, + ) + ) + in_d = dim + + def forward(self, x): + # BxT -> BxCxT + x = x.unsqueeze(1) + for conv in self.conv_layers: + x = conv(x) + return x + + def get_out_seq_lens_nonmask_after_a_layer(self, in_seq_lens_tensor, i): + """Returns the out_seq_lens_nonmask 0/1 tensor after a layer. + + Args: + in_seq_lens_tensor (LongTensor): length + + Returns: + LongTensor: length + """ + out_lengths = in_seq_lens_tensor.clone() + out_lengths = ((out_lengths.float() - (self.conv_layers_infos[i][1] - 1) - 1) / self.conv_layers_infos[i][-1] + 1).floor().long() + out_nonmask = (~lengths_to_padding_mask(out_lengths)).float() + return out_nonmask, out_lengths + + def get_out_seq_lens_tensor(self, in_seq_lens_tensor): + out = in_seq_lens_tensor.clone() + for i in range(len(self.conv_layers)): + out = ((out.float() - (self.conv_layers_infos[i][1] - 1) - 1) / self.conv_layers_infos[i][-1] + 1).floor().long() + return out diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/text_decoder_postnet.py b/SpeechT5/SpeechT5/speecht5/models/modules/text_decoder_postnet.py new file mode 100644 index 0000000000000000000000000000000000000000..f9230352196accfdd40891bb1a844b3740c8253c --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/text_decoder_postnet.py @@ -0,0 +1,93 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import torch.nn as nn +import torch +import contextlib + +from fairseq import utils +from fairseq.modules import ( + AdaptiveSoftmax, +) + +class TextDecoderPostnet(nn.Module): + """ + + Args: + in_channels (int): the number of input channels + mid_channels (int): the number of intermediate channels + out_channels (int): the number of output channels + kernel_sizes (List[int]): the kernel size for each convolutional layer + """ + + def __init__(self, embed_tokens, dictionary, args, output_projection=None,): + super(TextDecoderPostnet, self).__init__() + self.output_embed_dim = args.decoder_output_dim + self.output_projection = output_projection + self.adaptive_softmax = None + self.share_input_output_embed = args.share_input_output_embed + if self.output_projection is None: + self.build_output_projection(args, dictionary, embed_tokens) + self.freeze_decoder_updates = args.freeze_decoder_updates + self.num_updates = 0 + + def output_layer(self, features): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + return self.output_projection(features) + else: + return features + + def build_output_projection(self, args, dictionary, embed_tokens): + if args.adaptive_softmax_cutoff is not None: + self.adaptive_softmax = AdaptiveSoftmax( + len(dictionary), + self.output_embed_dim, + utils.eval_str_list(args.adaptive_softmax_cutoff, type=int), + dropout=args.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None, + factor=args.adaptive_softmax_factor, + tie_proj=args.tie_adaptive_proj, + ) + elif self.share_input_output_embed: + self.output_projection = nn.Linear( + embed_tokens.weight.shape[1], + embed_tokens.weight.shape[0], + bias=False, + ) + self.output_projection.weight = embed_tokens.weight + else: + self.output_projection = nn.Linear( + self.output_embed_dim, len(dictionary), bias=False + ) + nn.init.normal_( + self.output_projection.weight, mean=0, std=self.output_embed_dim ** -0.5 + ) + # num_base_layers = getattr(args, "base_layers", 0) + # for i in range(num_base_layers): + # self.layers.insert( + # ((i + 1) * args.decoder_layers) // (num_base_layers + 1), + # BaseLayer(args), + # ) + + def forward(self, x): + ft = self.freeze_decoder_updates <= self.num_updates + with torch.no_grad() if not ft else contextlib.ExitStack(): + return self._forward(x) + + def _forward(self, x): + # embed positions + x = self.output_layer(x) + + return x + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + self.num_updates = num_updates diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/text_decoder_prenet.py b/SpeechT5/SpeechT5/speecht5/models/modules/text_decoder_prenet.py new file mode 100644 index 0000000000000000000000000000000000000000..9b9dcd6b53e4501bcdd8690a1a3d972f167f3d67 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/text_decoder_prenet.py @@ -0,0 +1,129 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import math +import torch.nn as nn +import torch +import contextlib + +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from fairseq.models.transformer import Linear +from fairseq.modules import ( + PositionalEmbedding, + FairseqDropout, + LayerNorm +) + + +class TextDecoderPrenet(nn.Module): + """ + + Args: + in_channels (int): the number of input channels + mid_channels (int): the number of intermediate channels + out_channels (int): the number of output channels + kernel_sizes (List[int]): the kernel size for each convolutional layer + """ + + def __init__(self, embed_tokens, args): + super(TextDecoderPrenet, self).__init__() + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.decoder_layerdrop = args.decoder_layerdrop + self.num_updates = 0 + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = args.decoder_embed_dim + self.embed_dim = embed_dim + self.output_embed_dim = args.decoder_output_dim + + self.padding_idx = embed_tokens.padding_idx + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim) + + if not args.adaptive_input and args.quant_noise_pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + args.quant_noise_pq, + args.quant_noise_pq_block_size, + ) + else: + self.quant_noise = None + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + self.embed_positions = ( + PositionalEmbedding( + args.max_text_positions, + embed_dim, + self.padding_idx, + learned=args.decoder_learned_pos, + ) + if not args.no_token_positional_embeddings + else None + ) + export = getattr(args, "export", False) + if getattr(args, "layernorm_embedding", False): + self.layernorm_embedding = LayerNorm(embed_dim, export=export) + else: + self.layernorm_embedding = None + + self.freeze_decoder_updates = args.freeze_decoder_updates + + def forward(self, prev_output_tokens, incremental_state=None): + ft = self.freeze_decoder_updates <= self.num_updates + with torch.no_grad() if not ft else contextlib.ExitStack(): + return self._forward(prev_output_tokens, incremental_state) + + def _forward(self, prev_output_tokens, incremental_state=None): + if prev_output_tokens.eq(self.padding_idx).any(): + x_mask = prev_output_tokens.eq(self.padding_idx) + else: + x_mask = None + + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + return x, x_mask, incremental_state + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + self.num_updates = num_updates diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/text_encoder_prenet.py b/SpeechT5/SpeechT5/speecht5/models/modules/text_encoder_prenet.py new file mode 100644 index 0000000000000000000000000000000000000000..466e6493c002e2043f045f31c9c2d7f9712fb5ef --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/text_encoder_prenet.py @@ -0,0 +1,45 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import torch.nn as nn + +from espnet.nets.pytorch_backend.transformer.embedding import PositionalEncoding +from espnet.nets.pytorch_backend.transformer.embedding import ScaledPositionalEncoding + + +class TextEncoderPrenet(nn.Module): + """ + + Args: + in_channels (int): the number of input channels + mid_channels (int): the number of intermediate channels + out_channels (int): the number of output channels + kernel_sizes (List[int]): the kernel size for each convolutional layer + """ + + def __init__( + self, + embed_tokens, + args, + ): + super(TextEncoderPrenet, self).__init__() + self.padding_idx = embed_tokens.padding_idx + # define encoder prenet + # get positional encoding class + pos_enc_class = ( + ScaledPositionalEncoding if args.enc_use_scaled_pos_enc else PositionalEncoding + ) + + self.encoder_prenet = nn.Sequential( + embed_tokens, + pos_enc_class(args.encoder_embed_dim, args.transformer_enc_positional_dropout_rate, max_len=args.max_text_positions), + ) + + def forward(self, src_tokens): + return self.encoder_prenet(src_tokens), src_tokens.eq(self.padding_idx) diff --git a/SpeechT5/SpeechT5/speecht5/models/modules/transformer_layer.py b/SpeechT5/SpeechT5/speecht5/models/modules/transformer_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..3bdc0ba9fd0cc889e6934a53681f444519eb42ac --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/modules/transformer_layer.py @@ -0,0 +1,411 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +from typing import Dict, List, Optional + +import torch +import torch.nn as nn +import contextlib +from fairseq import utils +from fairseq.modules import LayerNorm +from .multihead_attention import MultiheadAttention +from fairseq.modules.fairseq_dropout import FairseqDropout +from fairseq.modules.quant_noise import quant_noise +from torch import Tensor + + +class TransformerSentenceEncoderLayer(nn.Module): + """ + Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained + models. + """ + + def __init__( + self, + embedding_dim: float = 768, + ffn_embedding_dim: float = 3072, + num_attention_heads: float = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + layer_norm_first: bool = False, + has_relative_attention_bias: bool = False, + ) -> None: + + super().__init__() + # Initialize parameters + self.embedding_dim = embedding_dim + self.dropout = dropout + self.activation_dropout = activation_dropout + + # Initialize blocks + self.activation_fn = utils.get_activation_fn(activation_fn) + self.self_attn = MultiheadAttention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + self_attention=True, + has_relative_attention_bias=has_relative_attention_bias, + ) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(self.activation_dropout) + self.dropout3 = nn.Dropout(dropout) + + self.layer_norm_first = layer_norm_first + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = LayerNorm(self.embedding_dim) + self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim) + self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim) + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = LayerNorm(self.embedding_dim) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embedding_dim//num_attention_heads) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + att_args=None, + pos_bias=None, + ): + """ + LayerNorm is applied either before or after the self-attention/ffn + modules similar to the original Transformer imlementation. + """ + residual = x + + if self.layer_norm_first: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + x = self.dropout1(x) + x = residual + x + + residual = x + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + else: + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + position_bias=pos_bias, + ) + + x = self.dropout1(x) + x = residual + x + + x = self.self_attn_layer_norm(x) + + residual = x + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + x = self.final_layer_norm(x) + + return x, attn + + +class TransformerDecoderLayer(nn.Module): + """Decoder layer block. + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *args.decoder_normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, args, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False, has_relative_attention_bias=False + ): + super().__init__() + self.embed_dim = args.decoder_embed_dim + self.num_updates = 0 + self.dropout_module = FairseqDropout( + args.dropout, module_name=self.__class__.__name__ + ) + self.quant_noise = getattr(args, "quant_noise_pq", 0) + self.quant_noise_block_size = getattr(args, "quant_noise_pq_block_size", 8) + + self.cross_self_attention = getattr(args, "cross_self_attention", False) + + self.freeze_decoder_updates = getattr(args, "freeze_decoder_updates", 0) + + self.self_attn = self.build_self_attention( + self.embed_dim, + args, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + ) + + self.activation_fn = utils.get_activation_fn( + activation=str(args.activation_fn) + if getattr(args, "activation_fn", None) is not None + else "relu" + ) + activation_dropout_p = getattr(args, "activation_dropout", 0) or 0 + if activation_dropout_p == 0: + # for backwards compatibility with models that use args.relu_dropout + activation_dropout_p = getattr(args, "relu_dropout", 0) or 0 + self.activation_dropout_module = FairseqDropout( + float(activation_dropout_p), module_name=self.__class__.__name__ + ) + self.normalize_before = args.decoder_normalize_before + + export = getattr(args, "export", False) + self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=export) + + if no_encoder_attn: + self.encoder_attn = None + self.encoder_attn_layer_norm = None + else: + self.encoder_attn = self.build_encoder_attention(self.embed_dim, args) + self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=export) + + self.fc1 = self.build_fc1( + self.embed_dim, + args.decoder_ffn_embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + self.fc2 = self.build_fc2( + args.decoder_ffn_embed_dim, + self.embed_dim, + self.quant_noise, + self.quant_noise_block_size, + ) + + self.final_layer_norm = LayerNorm(self.embed_dim, export=export) + self.need_attn = True + + self.onnx_trace = False + + self.has_relative_attention_bias = has_relative_attention_bias + if self.has_relative_attention_bias: + self.norm_k = LayerNorm(self.embed_dim//args.decoder_attention_heads) + + def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size) + + def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size): + return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size) + + def build_self_attention( + self, embed_dim, args, add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttention( + embed_dim, + args.decoder_attention_heads, + dropout=args.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=not getattr(args, "cross_self_attention", False), + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + #has_relative_attention_bias=args.has_relative_attention_bias, + ) + + def build_encoder_attention(self, embed_dim, args): + return MultiheadAttention( + embed_dim, + args.decoder_attention_heads, + kdim=getattr(args, "encoder_embed_dim", None), + vdim=getattr(args, "encoder_embed_dim", None), + dropout=args.attention_dropout, + encoder_decoder_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + ) + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def residual_connection(self, x, residual): + return residual + x + + def forward( + self, + x, + encoder_out: Optional[torch.Tensor] = None, + encoder_padding_mask: Optional[torch.Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + prev_self_attn_state: Optional[List[torch.Tensor]] = None, + prev_attn_state: Optional[List[torch.Tensor]] = None, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + pos_bias=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + ft = self.freeze_decoder_updates <= self.num_updates + + with torch.no_grad() if not ft else contextlib.ExitStack(): + if need_head_weights: + need_attn = True + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state, saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1 + ) + if self_attn_padding_mask is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + if self.encoder_attn is not None and encoder_out is not None: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + with torch.no_grad() if not ft else contextlib.ExitStack(): + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]] + return x, attn, self_attn_state + return x, attn, None + + def make_generation_fast_(self, need_attn: bool = False, **kwargs): + self.need_attn = need_attn + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + self.num_updates = num_updates diff --git a/SpeechT5/SpeechT5/speecht5/models/speecht5.py b/SpeechT5/SpeechT5/speecht5/models/speecht5.py new file mode 100644 index 0000000000000000000000000000000000000000..cb17131522344e7676bfcc4ceb3c053d17f9eb10 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/speecht5.py @@ -0,0 +1,1447 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import logging +from ast import literal_eval +from typing import Dict, List, Optional, Tuple + +import torch +import torch.nn.functional as F +from fairseq import utils +from fairseq.models import ( + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, + register_model, + register_model_architecture, +) +from .modules.text_encoder_prenet import TextEncoderPrenet +from .modules.text_decoder_prenet import TextDecoderPrenet +from .modules.text_decoder_postnet import TextDecoderPostnet +from .modules.speech_encoder_prenet import SpeechEncoderPrenet +from .modules.speech_encoder_postnet import SpeechEncoderPostnet +from .modules.speech_decoder_prenet import SpeechDecoderPrenet +from .modules.speech_decoder_postnet import SpeechDecoderPostnet +from .modules.speaker_decoder_postnet import SpeakerDecoderPostnet +from .modules.encoder import TransformerEncoder +from .modules.decoder import TransformerDecoder +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from fairseq.models.transformer import Embedding +from fairseq.modules import ( + GumbelVectorQuantizer, +) +from torch import Tensor + + +logger = logging.getLogger(__name__) + +DEFAULT_MAX_TEXT_POSITIONS = 450 +DEFAULT_MAX_SPEECH_POSITIONS = 4000 + + +@register_model("t5_transformer") +class T5TransformerModel(FairseqEncoderDecoderModel): + """Adapted Transformer model (https://arxiv.org/abs/1706.03762) for + speech-to-text tasks. The Transformer encoder/decoder remains the same. + A trainable input subsampler is prepended to the Transformer encoder to + project inputs into the encoder dimension as well as downsample input + sequence for computational efficiency.""" + + def __init__( + self, + args, + encoder, decoder, + text_encoder_prenet, speech_encoder_prenet, + text_decoder_prenet, speech_decoder_prenet, + text_decoder_postnet, speech_decoder_postnet, + speaker_decoder_postnet, speech_encoder_postnet, + ): + super().__init__(encoder, decoder) + + self.encoder = encoder + self.decoder = decoder + + self.text_encoder_prenet = text_encoder_prenet + self.speech_encoder_prenet = speech_encoder_prenet + + self.text_decoder_prenet = text_decoder_prenet + self.speech_decoder_prenet = speech_decoder_prenet + + self.text_decoder_postnet = text_decoder_postnet + self.speech_decoder_postnet = speech_decoder_postnet + self.speaker_decoder_postnet = speaker_decoder_postnet + + self.hubert_layer = speech_encoder_postnet + + self.reduction_factor = args.reduction_factor + self.spk_embed_dim = args.spk_embed_dim + # define projection layer + self.spk_embed_integration_type = args.spk_embed_integration_type + if self.spk_embed_dim is not None and self.spk_embed_integration_type != 'pre': + if self.spk_embed_integration_type == "add": + self.projection = torch.nn.Linear(self.spk_embed_dim, args.decoder_embed_dim) + else: + self.projection = torch.nn.Linear( + args.decoder_embed_dim + self.spk_embed_dim, args.decoder_embed_dim + ) + + self.use_codebook = args.use_codebook + self.codebook_prob = getattr(args, "codebook_prob", 0.5) # args.codebook_prob + if self.use_codebook: + vq_dim = args.latent_dim if args.latent_dim > 0 else args.encoder_embed_dim + self.quantizer = GumbelVectorQuantizer( + dim=args.encoder_embed_dim, + num_vars=args.latent_vars, + temp=args.latent_temp, + groups=args.latent_groups, + combine_groups=False, + vq_dim=vq_dim, + time_first=True, + weight_proj_depth=args.quantizer_depth, + weight_proj_factor=args.quantizer_factor, + ) + + self.num_updates = 0 + + # # Follow BERT's random weight initialization (for BART) + if args.bert_init: + self.apply(init_bert_params) + self.args = args + self.prune_modules(args.modules_filter) + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # Transformer + parser.add_argument( + "--activation-fn", + type=str, + choices=utils.get_available_activation_fns(), + help="activation function to use", + ) + parser.add_argument( + "--dropout", type=float, metavar="D", help="dropout probability" + ) + parser.add_argument( + "--attention-dropout", + type=float, + metavar="D", + help="dropout probability for attention weights", + ) + parser.add_argument( + "--activation-dropout", + "--relu-dropout", + type=float, + metavar="D", + help="dropout probability after activation in FFN.", + ) + parser.add_argument( + "--encoder-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension", + ) + parser.add_argument( + "--encoder-ffn-embed-dim", + type=int, + metavar="N", + help="encoder embedding dimension for FFN", + ) + parser.add_argument( + "--encoder-layers", type=int, metavar="N", help="num encoder layers" + ) + parser.add_argument( + "--encoder-attention-heads", + type=int, + metavar="N", + help="num encoder attention heads", + ) + parser.add_argument( + "--encoder-normalize-before", + action="store_true", + help="apply layernorm before each encoder block", + ) + parser.add_argument( + "--decoder-normalize-before", + action="store_true", + help="apply layernorm before each decoder block", + ) + parser.add_argument( + "--decoder-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension", + ) + parser.add_argument( + "--decoder-ffn-embed-dim", + type=int, + metavar="N", + help="decoder embedding dimension for FFN", + ) + parser.add_argument( + "--decoder-layers", type=int, metavar="N", help="num decoder layers" + ) + parser.add_argument( + "--decoder-attention-heads", + type=int, + metavar="N", + help="num decoder attention heads", + ) + parser.add_argument( + "--reduction-factor", + type=int, + help="reduction factor for decoder", + ) + parser.add_argument( + "--spk-embed-dim", + type=int, + help="speaker embedding dimension", + ) + parser.add_argument( + "--layernorm-embedding", + action="store_true", + help="add layernorm to embedding", + ) + parser.add_argument( + "--load-pretrained-encoder-from", + type=str, + metavar="STR", + help="model to take encoder weights from (for initialization)", + ) + parser.add_argument( + '--freeze-encoder-updates', + type=int, + help='number of steps to freeze encoder before finetune' + ) + parser.add_argument( + '--freeze-decoder-updates', + type=int, + help='number of steps to freeze decoder before finetune' + ) + parser.add_argument( + '--no-freeze-encoder-layer', + type=str, + help='which encoder layer not freeze during finetune' + ) + parser.add_argument( + "--share-input-output-embed", + action="store_true", + help="share decoder input and output embeddings", + ) + parser.add_argument( + "--share-ctc-embed", + action="store_true", + help="share ctc embed and decoder embed", + ) + parser.add_argument( + "--encoder-sliding-window-attn", + default=None, + type=int, + help="If not None but a even number, set sliding window attention to encoder's attn_mask, e.g., 4, 10, and 20", + ) + + # Convolutional subsampler + parser.add_argument( + "--encoder-speech-prenet", + default="conv", + type=str, + choices=["conv", "linear"], + help="The type of encoder speech prenet, e.g., conv or linear." + ) + parser.add_argument( + "--conv-kernel-sizes", + default="5,5", + type=str, + help="The layer of convolution of encoder speech prenet." + ) + parser.add_argument( + "--conv-channels", + default=1024, + type=int, + help="The channels of encoder speech prenet." + ) + parser.add_argument( + "--subsample-stride", + default="2,2", + type=str, + help="The subsample stride for conv1dsubsample." + ) + parser.add_argument( + "--spk-embed-integration-type", + type=str, + choices=["pre", "add"], + help="speaker embedding integration type" + ) + parser.add_argument( + "--dprenet-dropout-rate", + default=0.5, + type=float, + help="The dropout rate of decoder speech prenet." + ) + + ## SE + parser.add_argument( + "--se-predict", + default=None, + choices=["masking", "target", "delta"], + help="If set, source speech inputs decoder to predict the masking/target/delta of corresponding inputs." + + "masking is [0, 1], target is predicted output, delta is difference between inputs and outputs", + ) + parser.add_argument( + "--se-decoder-input", + type=str, + default="previous_target", + choices=["previous_target", "source"], + ) + + ## SID + parser.add_argument( + "--modules-filter", + default=None, + type=str, + help="Remove unused modules for, e.g., SID.", + ) + parser.add_argument( + "--sid-pad-prenet", + action="store_true", + help="If set, the size of text dictionary is as small as for token.", + ) + parser.add_argument( + "--encoder-attn-branch", + type=str, + default="identity,full", + help="encoder attention branch sliding window, e.g., 'identity,0,2,4,full'", + ) + parser.add_argument( + "--encoder-block-branch", + type=str, + help="average the output of encoder, e.g., '4,5,6'", + ) + parser.add_argument( + "--sid-encoder-cls", + default=None, + choices=["encoder"], + help="If set, add cls vector to the encoder input, e.g., constant vector.", + ) + parser.add_argument( + "--sid-shuffle-encoder-input", + action="store_true", + help="If set, shuffle encoder input in time.", + ) + parser.add_argument( + "--sid-decoder-speaker", + action="store_true", + help="If set, apply speaker decoder as transformer decoder.", + ) + parser.add_argument( + "--sid-decoder-attn-dim", + default=128, + type=int, + help="Attention dimension in attensive statistics pooling of speaker decoder.", + ) + parser.add_argument( + "--sid-t5-postnet", + action="store_true", + help="If set, apply TextDecoderPostnet as speaker classification.", + ) + parser.add_argument( + "--sid-embed-dim", + default=128, + type=int, + help="Embedding dimension in speaker postnet for speaker identification if embed postnet.", + ) + parser.add_argument( + "--sid-pooling-layer", + default="decoder", + type=str, + choices=["decoder-las", "decoder", "encoder", "encoder-cls", "encoder-speaker"], + help="The output of decoder or encoder uses as SID pooling layer over temporal dimension.", + ) + parser.add_argument( + "--sid-no-pooling-bn", + action="store_true", + help="If set, not attention batchnorm.", + ) + parser.add_argument( + "--sid-no-embed-postnet", + action="store_true", + help="If set, no layer between decoder output and classification layer.", + ) + parser.add_argument( + "--sid-normalize-postnet", + action="store_true", + help="If set, normalize input and weight in postnet/classifier.", + ) + parser.add_argument( + "--sid-softmax-type", + default="softmax", + choices=["softmax", "amsoftmax", "aamsoftmax"], + help="If using amsoftmax or aamsoftmax, the target should be given.", + ) + parser.add_argument( + "--softmax-scale", + default=1.0, + type=float, + help="Scale for AMSoftmax or AAMSoftmax.", + ) + parser.add_argument( + "--softmax-margin", + default=0.0, + type=float, + help="Margin for AMSoftmax or AAMSoftmax.", + ) + parser.add_argument( + "--softmax-easy-margin", + action="store_true", + help="Enable easy margin for AAMSoftmax.", + ) + parser.add_argument( + "--encoder-layerdrop", + type=float, + metavar="D", + help="LayerDrop probability for encoder", + ) + parser.add_argument( + "--decoder-layerdrop", + type=float, + metavar="D", + help="LayerDrop probability for decoder", + ) + + ## Hubert + parser.add_argument( + '--feature-grad-mult', + type=float, + help='multiply feature extractor var grads by this' + ) + parser.add_argument( + '--logit-temp', + type=float, + help='temperature to divide logits by' + ) + parser.add_argument( + '--final-dim', + type=int, + help="project final representations and targets to this many " + "dimensions. set to encoder_embed_dim is <= 0" + ) + + # mask + parser.add_argument( + '--hubert-mask-length', + type=int, + help='mask length' + ) + parser.add_argument( + '--mask-prob', + type=float, + help='probability of replacing a token with mask' + ) + parser.add_argument( + "--mask-selection", + choices=["static", "uniform", "normal", "poisson"], + help="how to choose mask length", + ) + parser.add_argument( + '--mask-other', + type=float, + help="secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indices" + ) + parser.add_argument( + '--mask-min-space', + type=int, + help='min space between spans (if no overlap is enabled)' + ) + + # channel masking + parser.add_argument( + '--mask-channel-length', + type=int, + help='length of the mask for features (channels)' + ) + parser.add_argument( + '--mask-channel-prob', + type=float, + help="probability of replacing a feature with 0" + ) + parser.add_argument( + "--mask-channel-selection", + choices=["static", "uniform", "normal", "poisson"], + help="how to choose mask length for channel masking", + ) + parser.add_argument( + '--mask-channel-other', + type=float, + help="secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indices" + ) + parser.add_argument( + '--mask-channel-min-space', + type=int, + help='min space between spans (if no overlap is enabled)' + ) + + # abs positional embeddings + parser.add_argument( + '--conv-pos', + type=int, + help='number of filters for convolutional positional embeddings' + ) + parser.add_argument( + '--conv-pos-groups', + type=int, + help='number of groups for convolutional positional embedding' + ) + + # codebook related + parser.add_argument( + "--use-codebook", + action="store_true", + help="whether to use codebook", + ) + parser.add_argument( + "--codebook-prob", + type=float, + help="probability to use codebook", + ) + parser.add_argument( + "--latent-vars", + type=int, + help="number of latent variables V in each group of the codebook", + ) + parser.add_argument( + "--latent-groups", + type=int, + help="number of groups G of latent variables in the codebook", + ) + parser.add_argument( + "--latent-dim", + type=int, + help="if > 0, uses this dimensionality for latent variables. " + "otherwise uses final_dim / latent_groups", + ) + parser.add_argument( + "--latent-temp", + type=literal_eval, + help="temperature for latent variable sampling. " + "can be tuple of 3 values (start, end, decay)", + ) + parser.add_argument( + "--quantizer-depth", + type=int, + help="number of quantizer layers", + ) + parser.add_argument( + "--quantizer-factor", + type=int, + help="number of quantizer layers", + ) + parser.add_argument( + "--get-code-distribution", + action='store_true', + help="whether to get the code distribution (for test)", + ) + + # relative pos enc + parser.add_argument( + "--relative-position-embedding", + action='store_true', + help="whether to use relative position embedding", + ) + parser.add_argument( + "--num-buckets", + type=int, + default=320, + help="num of buckets for relative position embedding", + ) + parser.add_argument( + "--max-distance", + type=int, + default=1280, + help="max distance for relative position embedding", + ) + parser.add_argument( + "--encoder-max-relative-position", + type=int, + help="max distance for relative position embedding in encoder", + ) + parser.add_argument( + "--decoder-max-relative-position", + type=int, + help="max distance for relative position embedding in decoder", + ) + + # hubert feature extractor + parser.add_argument( + "--conv-feature-layers", + type=str, + help= "string describing convolutional feature extraction " + "layers in form of a python list that contains " + "[(dim, kernel_size, stride), ...]", + ) + parser.add_argument( + "--conv-bias", + action='store_true', + help="include bias in conv encoder", + ) + parser.add_argument( + "--extractor-mode", + choices=["default", "layer_norm"], + help="mode for feature extractor. default has a single group " + "norm with d groups in the first conv block, whereas layer_norm " + "has layer norms in every block (meant to use with normalize=True)" + ) + + # others + parser.add_argument( + "--bert-init", + action='store_true', + help="initilize as bert", + ) + parser.add_argument( + "--unb-enc-layer", + type=int, + default=-1, + help="which layer's output is used as the input of decoder", + ) + + # Encoder, Decoder + @classmethod + def build_encoder(cls, args, dictionary=None, embed_tokens=None): + return TransformerEncoder(args, dictionary, embed_tokens) + + @classmethod + def build_decoder(cls, args): + return TransformerDecoder(args) + + # Encoder Prenet + @classmethod + def build_text_encoder_prenet(cls, embed_tokens, args): + return TextEncoderPrenet(embed_tokens, args) + + @classmethod + def build_speech_encoder_prenet(cls, args): + return SpeechEncoderPrenet(args) + + # Decoder Prenet + @classmethod + def build_text_decoder_prenet(cls, embed_tokens, args): + return TextDecoderPrenet(embed_tokens, args) + + @classmethod + def build_speech_decoder_prenet(cls, odim, args): + return SpeechDecoderPrenet(odim, args) + + # Decoder Postnet + @classmethod + def build_text_decoder_postnet(cls, embed_tokens, dictionary, args): + return TextDecoderPostnet(embed_tokens, dictionary, args) + + @classmethod + def build_speaker_decoder_postnet(cls, embed_dim, class_num, args): + return SpeakerDecoderPostnet(embed_dim, class_num, args) + + @classmethod + def build_speech_decoder_postnet(cls, odim, args): + return SpeechDecoderPostnet(odim, args) + + @classmethod + def build_speech_encoder_postnet(cls, dictionaries, args): + return SpeechEncoderPostnet(dictionaries, args) + + @classmethod + def build_model(cls, args, task): + """Build a new model instance.""" + + # make sure all arguments are present in older models + base_architecture(args) + + def build_embedding(dictionary, embed_dim, max_num_embeddings=None): + num_embeddings = len(dictionary) + if max_num_embeddings is not None and isinstance(max_num_embeddings, int): + num_embeddings = min(num_embeddings, max_num_embeddings) + padding_idx = dictionary.pad() + return Embedding(num_embeddings, embed_dim, padding_idx) + + if hasattr(args, "sid_pad_prenet") and args.sid_pad_prenet: + max_num_embeddings = 3 # at index 2 + else: + max_num_embeddings = None + + text_decoder_embed_tokens = build_embedding( + task.dicts["text"], args.decoder_embed_dim, max_num_embeddings + ) + + if args.share_input_output_embed: + text_encoder_embed_tokens = text_decoder_embed_tokens + else: + text_encoder_embed_tokens = build_embedding( + task.dicts["text"], args.encoder_embed_dim + ) + + speech_odim = args.speech_odim + if "text" in task.dicts: + encoder = cls.build_encoder(args, task.dicts["text"], text_encoder_embed_tokens) + else: + encoder = cls.build_encoder(args) + decoder = cls.build_decoder(args) + + text_encoder_prenet = cls.build_text_encoder_prenet(text_encoder_embed_tokens, args) + speech_encoder_prenet = cls.build_speech_encoder_prenet(args) + + text_decoder_prenet = cls.build_text_decoder_prenet(text_decoder_embed_tokens, args) + if getattr(args, "sid_pooling_layer", None) == "decoder-las": + speech_decoder_prenet = cls.build_speech_encoder_prenet(args) + else: + speech_decoder_prenet = cls.build_speech_decoder_prenet(speech_odim, args) + + text_decoder_postnet = cls.build_text_decoder_postnet(text_decoder_embed_tokens, task.dicts['text'], args) + speech_decoder_postnet = cls.build_speech_decoder_postnet(speech_odim, args) + + if getattr(args, "sid_t5_postnet", False): + speaker_decoder_postnet = None + else: + if task.t5_task == "s2c": + speaker_decoder_postnet = cls.build_speaker_decoder_postnet(args.sid_embed_dim, len(task.dicts['text']), args) + else: + speaker_decoder_postnet = None + + if "hubert" in task.dicts: + speech_encoder_postnet = cls.build_speech_encoder_postnet(task.dicts['hubert'], args) + else: + speech_encoder_postnet = None + + return cls( + args, + encoder, decoder, + text_encoder_prenet, speech_encoder_prenet, + text_decoder_prenet, speech_decoder_prenet, + text_decoder_postnet, speech_decoder_postnet, + speaker_decoder_postnet, speech_encoder_postnet, + ) + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + # net_output['encoder_out'] is a (B, T, D) tensor + lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + def get_normalized_probs_for_ctc(self, net_output, log_probs): + """Get normalized probabilities (or log probs) from a net's output.""" + + logits = net_output["encoder_out_for_ctc"][0] + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + def get_logits(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + logits_list = [x.float() for x in logits_list if x is not None] + return logits_list + + def get_targets(self, sample, net_output, is_masked=True): + if "logit_m_list" in net_output: + logits_list = self.get_logits(net_output, is_masked) + targets_list = [ + x.new_zeros(x.size(0), dtype=torch.long) for x in logits_list + ] + return targets_list + else: + return sample["target"] + + def get_extra_losses(self, net_output): + extra_losses = [] + names = [] + + if "features_pen" in net_output: + extra_losses.append(net_output["features_pen"]) + names.append("features_pen") + + if "prob_perplexity" in net_output: + extra_losses.append( + (net_output["num_vars"] - net_output["prob_perplexity"]) + / net_output["num_vars"] + ) + names.append("prob_perplexity") + + return extra_losses, names + + def forward(self, source=None, src_tokens=None, src_lengths=None, prev_output_tokens=None, tgt_lengths=None, spkembs=None, target_list=None, task_name=None, padding_mask=None, only_hubert=False, only_ctc=False, feature_only=False, tgt_enc_layer=None, mask=True): + """ + The forward method inherited from the base class has a **kwargs + argument in its input, which is not supported in torchscript. This + method overwrites the forward method definition without **kwargs. + """ + assert source is not None or src_tokens is not None + # padding_mask is not none only when input is waveform + if source is None and padding_mask is None and not feature_only: + input_type = 'text' + else: + input_type = 'speech' + + if prev_output_tokens is not None and len(prev_output_tokens.size()) == 2: + output_type = 'text' + codebook_out = {} + else: + output_type = 'speech' + + if task_name is not None and task_name == "s2c": + if target_list is not None and target_list.size(1) == 1 and not getattr(self.args, "sid_t5_postnet", False): + sid_target = F.one_hot(target_list.squeeze(1), num_classes=self.speaker_decoder_postnet.class_num) + else: + sid_target = None + target_list = None + + # Encoder Prenet + if input_type == 'text': + encoder_input, encoder_padding_mask = self.text_encoder_prenet(src_tokens) + else: + if target_list is not None: + encoder_input, encoder_padding_mask = self.speech_encoder_prenet(source, require_feat_pen=True, target_list=target_list, padding_mask=padding_mask, mask=mask) + encoder_input, features_pen, mask_indices, target_list = encoder_input + else: + encoder_input, encoder_padding_mask = self.speech_encoder_prenet(source, padding_mask=padding_mask, mask=self.training) + # shuffle a batch of inputs of encoder + if self.training and hasattr(self.args, "sid_shuffle_encoder_input") and getattr(self.args, "sid_shuffle_encoder_input", False): + shuffle_index = torch.randperm(encoder_padding_mask.size(1), device=encoder_padding_mask.device) + encoder_input = torch.index_select(encoder_input, 1, shuffle_index) + encoder_padding_mask = torch.index_select(encoder_padding_mask, 1, shuffle_index) + if getattr(self.args, "sid_encoder_cls", None) == "encoder": + prev_output_tokens = torch.zeros_like(prev_output_tokens) + encoder_input, encoder_padding_mask = self._integrate_with_speaker_cls(prev_output_tokens, encoder_input, encoder_padding_mask) + + # Encoder: T x B x C + encoder_output = self.encoder(encoder_input, encoder_padding_mask, tgt_layer=tgt_enc_layer) + + if task_name is not None and task_name == 'speech_pretrain' and feature_only: + return encoder_output["encoder_out"][0].transpose(0, 1) + + if task_name is not None and task_name == 's2c': + if self.args.sid_pooling_layer == "encoder": + return self.speaker_decoder_postnet(encoder_output["encoder_out"][0].transpose(0, 1).mean(1), sid_target), None + elif self.args.sid_pooling_layer == "encoder-cls": + return self.speaker_decoder_postnet(encoder_output["encoder_out"][0].transpose(0, 1)[:,0], sid_target), None + elif self.args.sid_pooling_layer == "encoder-speaker" or getattr(self.args, "sid_decoder_speaker", False): + return self.speaker_decoder_postnet(encoder_output["encoder_out"][0].transpose(0, 1), sid_target), None + + if target_list is not None: + hubert_results = self.hubert_layer( + encoder_output["encoder_out"][0].transpose(0, 1), + encoder_padding_mask, + mask_indices, + target_list + ) + + hubert_results['features_pen'] = features_pen + + if "decoder_input" in encoder_output and encoder_output["decoder_input"][0] is not None: + # Change the encoder output to decoder input once set unb-enc-layer + encoder_output["encoder_out"] = encoder_output["decoder_input"] + + if self.use_codebook: + q = self.quantizer(encoder_output["encoder_out"][0].transpose(0, 1)) + + # q["x"]: B x T x C + # Sample indexs according to the codebook prob + random_idx = torch.randperm(q["x"].size(1))[:int(q["x"].size(1) * self.codebook_prob)] + # Make weight for q + q_w = q["x"].new_zeros(q["x"].size(1)) + q_w[random_idx] = 1.0 + # Combine quantized codes and encoder output + encoder_output["encoder_out"][0] = ( + q_w.view(-1, 1) * q["x"] + (- q_w + 1).view(-1, 1) * encoder_output["encoder_out"][0].transpose(0, 1) + ).transpose(0, 1) + + # encoder_output["encoder_out"][0] = q["x"].transpose(0, 1) + if output_type == 'speech': + hubert_results["prob_perplexity"] = q["prob_perplexity"] + hubert_results["code_perplexity"] = q["code_perplexity"] + hubert_results["num_vars"] = q["num_vars"] + hubert_results["temp"] = q["temp"] + elif output_type == 'text': + codebook_out["prob_perplexity"] = q["prob_perplexity"] + codebook_out["code_perplexity"] = q["code_perplexity"] + codebook_out["num_vars"] = q["num_vars"] + codebook_out["temp"] = q["temp"] + + if only_hubert and target_list is not None: + return hubert_results, None + + if only_ctc and task_name is not None and task_name == "s2t": + return None, encoder_output + elif not self.training and prev_output_tokens is None and task_name == "s2t" and task_name is not None: + return encoder_output + + # Decoder Prenet + if output_type == 'text': + # _ is the incremental state + prev_output_tokens, tgt_mask, _ = self.text_decoder_prenet(prev_output_tokens) + if task_name is not None and task_name == 's2c': + prev_output_tokens = torch.zeros_like(prev_output_tokens) + else: + # integrate speaker embedding + if self.spk_embed_integration_type == "pre" and self.spk_embed_dim is not None: + # Decoder Prenet + prev_output_tokens, tgt_mask = self.speech_decoder_prenet(prev_output_tokens, tgt_lengths, spkembs) + else: + if self.spk_embed_dim is not None: + encoder_output["encoder_out"] = [self._integrate_with_spk_embed( + encoder_output["encoder_out"][0].transpose(0, 1), spkembs + ).transpose(0, 1)] + + prev_output_tokens, tgt_mask = self.speech_decoder_prenet(prev_output_tokens, tgt_lengths) + + # BART Sequence Classification: cat + feature before decoder + if task_name is not None and task_name == 's2c' and self.args.sid_pooling_layer == "decoder-las": + decoder_feat_input, decoder_feat_mask = self.speech_decoder_prenet(src_tokens, src_lengths) + prev_output_tokens, tgt_mask = self._integrate_with_speaker_cls((prev_output_tokens, tgt_mask), decoder_feat_input, decoder_feat_mask, cls_first=False) + + # SE predict masking to corresponding inputs and source speech replaces the prev_output_tokens as the input of decoder + if task_name is not None and task_name == "s2s" and getattr(self.args, "se_decoder_input", "previous_target") == "source": + prev_output_tokens, tgt_mask = self.speech_decoder_prenet(src_tokens, src_lengths) + + # Decoder + decoder_output, extra = self.decoder(prev_output_tokens, tgt_mask, encoder_output, + full_context_alignment=getattr(self.args, "decoder_full_context_alignment", False), + alignment_layer=(-1 if target_list is None and output_type == 'speech' else None)) + # Decoder Postnet + if task_name is not None and task_name == 's2c': + if not getattr(self.args, "sid_t5_postnet", False): + if self.args.sid_pooling_layer == "decoder": + return self.speaker_decoder_postnet(decoder_output.mean(1), sid_target), None + elif self.args.sid_pooling_layer == "decoder-las": + indices = (tgt_mask.eq(False).float().sum(1) - 1.0).type(torch.int64) + indices = indices.unsqueeze(1).unsqueeze(2).expand(-1, -1, decoder_output.size(2)) + return self.speaker_decoder_postnet(decoder_output.gather(1, indices), sid_target), None + else: + return (self.text_decoder_postnet(decoder_output), None), encoder_output + + # SE predict: masking, target, delta. Ensure reduction factor 1 + if task_name is not None and task_name == 's2s' and getattr(self.args, "se_predict", None) is not None: + assert self.reduction_factor == 1, f"{self.reduction_factor} != 1" + before_outs, after_outs, logits = self.speech_decoder_postnet(decoder_output) + se_predict = getattr(self.args, "se_predict") + if se_predict == "masking": + before_outs = torch.sigmoid(before_outs) * src_tokens + after_outs = torch.sigmoid(after_outs) * src_tokens + return before_outs, after_outs, logits, extra['attn'][0] + elif se_predict == "target": + return before_outs, after_outs, logits, extra['attn'][0] + elif se_predict == "delta": + before_outs = before_outs - src_tokens + after_outs = after_outs - src_tokens + return before_outs, after_outs, logits, extra['attn'][0] + else: + raise ValueError(f"{se_predict} not in [masking, target, delta]") + + if task_name is not None and task_name == 's2t': + #return self.text_decoder_postnet(decoder_output), None + return (self.text_decoder_postnet(decoder_output), None), encoder_output + if output_type == 'text': + return (self.text_decoder_postnet(decoder_output), None), codebook_out, encoder_output + else: + if target_list is not None: + return hubert_results, (self.speech_decoder_postnet(decoder_output) + (extra['attn'][0],)) + else: + return self.speech_decoder_postnet(decoder_output) + (extra['attn'][0],) + + def _integrate_with_speaker_cls(self, pad_input, encoder_input, encoder_padding_mask=None, cls_first=True): + """ + encoder_input: [B, T, C] + encoder_padding_mask: [B, T] + """ + if hasattr(self, "text_decoder_prenet"): + if isinstance(pad_input, tuple): + repeat_cls_vector, repeat_cls_mask = pad_input + else: + repeat_cls_vector, repeat_cls_mask, _ = self.text_decoder_prenet(pad_input) + + if encoder_padding_mask is not None: + bsz = encoder_input.size(0) + tsz = encoder_input.size(1) + encoder_padding_mask = encoder_input.new_zeros((bsz, tsz)) == 1.0 + if repeat_cls_mask is None: + mask_size = (encoder_padding_mask.size(0), 1) + mask_type = encoder_padding_mask.dtype + repeat_cls_mask = encoder_padding_mask.new_zeros(mask_size) == 1.0 + ret_encoder_padding_mask = torch.cat([repeat_cls_mask, encoder_padding_mask], dim=1) + + if cls_first: + ret_encoder_input = torch.cat([repeat_cls_vector, encoder_input], dim=1) + else: + ret_encoder_input = torch.cat([encoder_input, encoder_input[:,-1:,:]], dim=1) + mask_size = (encoder_padding_mask.size(0), 1) + mask_type = encoder_padding_mask.dtype + repeat_cls_mask_ = encoder_padding_mask.new_ones(mask_size) == 1.0 + encoder_padding_mask_ = torch.cat([encoder_padding_mask, repeat_cls_mask_], dim=1) + indices = encoder_padding_mask.eq(False).float().sum(1).type(torch.int64).unsqueeze(1) + indices_mask = torch.zeros_like(ret_encoder_padding_mask).scatter(1, indices, 1.0) + ret_encoder_input = ret_encoder_input * (1.0 - encoder_padding_mask_.type(ret_encoder_input.dtype).unsqueeze(2)) \ + + repeat_cls_vector * indices_mask.type(repeat_cls_vector.dtype).unsqueeze(2) + + return ret_encoder_input, ret_encoder_padding_mask + + def _integrate_with_spk_embed(self, hs, spembs): + """Integrate speaker embedding with hidden states. + Args: + hs (Tensor): Batch of hidden state sequences (B, Tmax, adim). + spembs (Tensor): Batch of speaker embeddings (B, spk_embed_dim). + Returns: + Tensor: Batch of integrated hidden state sequences (B, Tmax, adim) + """ + if self.spk_embed_integration_type == "add": + # apply projection and then add to hidden states + spembs = self.projection(F.normalize(spembs)) + hs = hs + spembs.unsqueeze(1) + elif self.spk_embed_integration_type == "concat": + # concat hidden states with spk embeds and then apply projection + spembs = F.normalize(spembs).unsqueeze(1).expand(-1, hs.size(1), -1) + hs = self.projection(torch.cat([hs, spembs], dim=-1)) + else: + raise NotImplementedError("support only add or concat.") + + return hs + + def load_state_dict( + self, + state_dict, + strict=True, + model_cfg=None, + args=None, + ): + """NOT STRICT Copies parameters and buffers from *state_dict* into this module and + its descendants. + + Overrides the method in :class:`nn.Module`. Compared with that method + this additionally "upgrades" *state_dicts* from old checkpoints. + """ + # self.prune_modules(model_cfg.modules_filter) + model_dict_size = self.text_decoder_postnet.output_projection.out_features + ckpt_dict_size = state_dict["text_decoder_postnet.output_projection.weight"].size(0) + if model_dict_size != ckpt_dict_size: + # reset dictionary-related modules, such as embedding table and encoder ctc embed + logger.warn(f"not equal dictionary between model and checkpoint: {model_dict_size} vs {ckpt_dict_size}") + logger.info(f"reset model dictionary with size of {model_dict_size}") + removed_keys = [ + key for key in state_dict.keys() if any( + key.startswith(previ) for previ in [ + "encoder.proj", "text_encoder_prenet", "text_decoder_prenet", "text_decoder_postnet" + ] + ) + ] + for key in removed_keys: + state_dict.pop(key, None) + logger.info(f"removed loaded checkpoint: {key}") + for m in self._modules.keys(): + m_state_dict = { + key.replace(f"{m}.", ""): value for key, value in state_dict.items() if key.startswith(f"{m}.") + } + if hasattr(self, m): + self._modules[m].load_state_dict(m_state_dict, False) + return self + + def prune_modules(self, modules_filter=None): + """Prune unused modules for specific tasks.""" + if modules_filter is None: + return + elif modules_filter == "s2c": + if hasattr(self, "text_encoder_prenet"): del self.text_encoder_prenet + if hasattr(self, "speech_decoder_prenet") and getattr(self.args, "sid_pooling_layer", None) != "decoder-las": + del self.speech_decoder_prenet + if hasattr(self, "speech_decoder_postnet"): del self.speech_decoder_postnet + if hasattr(self, "text_decoder_postnet"): del self.text_decoder_postnet + if hasattr(self, "speech_encoder_postnet"): del self.speech_encoder_postnet + if hasattr(self.encoder, "proj"): self.encoder.proj = None + if hasattr(self, "projection"): del self.projection + if hasattr(self, "quantizer"): del self.quantizer + if getattr(self.args, "sid_pooling_layer", "decoder").startswith("encoder") or getattr(self.args, "sid_decoder_speaker", False): + if hasattr(self.decoder, "dropout_module"): del self.decoder.dropout_module + if hasattr(self.decoder, "layers"): del self.decoder.layers + if hasattr(self.decoder, "layer_norm"): del self.decoder.layer_norm + if hasattr(self, "text_decoder_prenet"): del self.text_decoder_prenet + elif modules_filter == "s2s": + if hasattr(self, "speaker_decoder_postnet"): del self.speaker_decoder_postnet + if hasattr(self, "text_encoder_prenet"): del self.text_encoder_prenet + if hasattr(self, "text_decoder_prenet"): del self.text_decoder_prenet + if hasattr(self, "text_decoder_postnet"): del self.text_decoder_postnet + if hasattr(self, "speech_encoder_postnet"): del self.speech_encoder_postnet + if hasattr(self.encoder, "proj"): self.encoder.proj = None + if hasattr(self, "projection"): del self.projection + if hasattr(self, "quantizer"): del self.quantizer + elif modules_filter == "t2s": + if hasattr(self, "speaker_decoder_postnet"): del self.speaker_decoder_postnet + if hasattr(self, "speech_encoder_prenet"): del self.speech_encoder_prenet + if hasattr(self, "text_decoder_prenet"): del self.text_decoder_prenet + if hasattr(self, "text_decoder_postnet"): del self.text_decoder_postnet + if hasattr(self, "speech_encoder_postnet"): del self.speech_encoder_postnet + if hasattr(self.encoder, "proj"): self.encoder.proj = None + if hasattr(self, "projection"): del self.projection + if hasattr(self, "quantizer"): del self.quantizer + elif modules_filter == "s3prl": + # remain the encoder and the pre/post net + if hasattr(self.decoder, "dropout_module"): del self.decoder.dropout_module + if hasattr(self.decoder, "layers"): del self.decoder.layers + if hasattr(self.decoder, "layer_norm"): del self.decoder.layer_norm + if hasattr(self, "speaker_decoder_postnet"): del self.speaker_decoder_postnet + if hasattr(self, "text_decoder_prenet"): del self.text_decoder_prenet + if hasattr(self, "text_decoder_postnet"): del self.text_decoder_postnet + if hasattr(self, "speech_decoder_prenet"): del self.speech_decoder_prenet + if hasattr(self, "speech_decoder_postnet"): del self.speech_decoder_postnet + if hasattr(self, "speech_encoder_postnet"): del self.speech_encoder_postnet + if hasattr(self.encoder, "proj"): self.encoder.proj = None + if hasattr(self, "projection"): del self.projection + if hasattr(self, "quantizer"): del self.quantizer + + def forward_encoder_torchscript(self, net_input: Dict[str, Tensor]): + """A TorchScript-compatible version of forward. + + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + if torch.jit.is_scripting(): + return self.forward_encoder( + source=net_input["source"], + padding_mask=net_input["padding_mask"] + ) + else: + return self.forward_encoder_non_torchscript(net_input) + + @torch.jit.unused + def forward_encoder_non_torchscript(self, net_input: Dict[str, Tensor]): + encoder_input = { + k: v for k, v in net_input.items() if k != "prev_output_tokens" and k != "task_name" + } + return self.forward_encoder(**encoder_input) + + def forward_encoder(self, source, padding_mask=None): + # Encoder Prenet + encoder_input, encoder_padding_mask = self.speech_encoder_prenet(source, padding_mask=padding_mask, mask=False) + + # Encoder + encoder_output = self.encoder(encoder_input, encoder_padding_mask) + + return encoder_output + + def forward_text_encoder(self, src_tokens): + # Text Encoder Prenet + encoder_input, encoder_padding_mask = self.text_encoder_prenet(src_tokens) + + # Encoder + encoder_output = self.encoder(encoder_input, encoder_padding_mask) + + return encoder_output + + def forward_decoder(self, tokens, encoder_out, incremental_state): + # Decoder Prenet + prev_output_tokens, tgt_mask, incremental_state = self.text_decoder_prenet(tokens, incremental_state) + + # Decoder + decoder_output, extra = self.decoder( + prev_output_tokens, + tgt_mask, + encoder_out=encoder_out, + incremental_state=incremental_state, + ) + + # Decoder Postnet + return self.text_decoder_postnet(decoder_output), extra + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def generate_class(self, source, prev_output_tokens, **kwargs): + encoder_out = self.forward_encoder(source, padding_mask=kwargs["padding_mask"]) + + prev_output_tokens, tgt_mask, _ = self.text_decoder_prenet(prev_output_tokens, {}) + prev_output_tokens = torch.zeros_like(prev_output_tokens) # s2c use zero vector as [CLS] + + decoder_output, extra = self.decoder( + prev_output_tokens, + tgt_mask, + encoder_out=encoder_out, + ) + + decoder_out, embed = self.speaker_decoder_postnet(decoder_output.mean(1)) + + pred_class = decoder_out.argmax(1) + return pred_class + + def generate_speech(self, source=None, src_tokens=None, spkembs=None, **kwargs): + assert source is not None or src_tokens is not None + + threshold = kwargs.get("threshold", 0.5) + minlenratio = kwargs.get("threshold", 0.0) + + if source is None: + assert src_tokens.size(0) == 1 + encoder_out = self.forward_text_encoder(src_tokens) + maxlenratio = kwargs.get("threshold", 20.0) + else: + assert source.size(0) == 1 + encoder_out = self.forward_encoder(source, padding_mask=kwargs["padding_mask"]) + maxlenratio = kwargs.get("threshold", 10.0) + + if spkembs is not None and self.spk_embed_integration_type != "pre": + encoder_out["encoder_out"] = [self._integrate_with_spk_embed( + encoder_out["encoder_out"][0].transpose(0, 1), spkembs + ).transpose(0, 1)] + spkembs = None + + maxlen = int(encoder_out["encoder_out"][0].size(0) * maxlenratio / self.reduction_factor) + minlen = int(encoder_out["encoder_out"][0].size(0) * minlenratio / self.reduction_factor) + + idx = 0 + ys = encoder_out["encoder_out"][0].new_zeros(1, 1, self.speech_decoder_postnet.odim) + outs, probs = [], [] + + # forward decoder step-by-step + if isinstance(self.decoder, FairseqIncrementalDecoder): + incremental_states = {} + else: + incremental_states = None + attns = [] + while True: + # update index + idx += 1 + # calculate output and stop prob at idx-th step + decoder_in, _ = self.speech_decoder_prenet(ys, spkembs=spkembs) + z, extra = self.decoder(decoder_in[:,-1:], None, encoder_out, incremental_states, alignment_layer=-1) + outs += [self.speech_decoder_postnet.feat_out(z[0, -1]).view(self.reduction_factor, self.speech_decoder_postnet.odim)] # [(r, odim), ...] + probs += [torch.sigmoid(self.speech_decoder_postnet.prob_out(z[0, -1]))] # [(r), ...] + + # update next inputs + ys = torch.cat((ys, outs[-1][-1].view(1, 1, self.speech_decoder_postnet.odim)), dim=1) # (1, idx + 1, odim) + attns.append(torch.stack([att_l[0] for att_l in extra['attn'][0]], dim=0)) + # check whether to finish generation + if int(sum(probs[-1] >= threshold)) > 0 or idx >= maxlen: + # check mininum length + if idx < minlen: + continue + outs = (torch.cat(outs, dim=0).unsqueeze(0).transpose(1, 2)) # (L, odim) -> (1, L, odim) -> (1, odim, L) + if self.speech_decoder_postnet.postnet is not None: + outs = outs + self.speech_decoder_postnet.postnet(outs) # (1, odim, L) + outs = outs.transpose(2, 1).squeeze(0) # (L, odim) + probs = torch.cat(probs, dim=0) + attn = torch.cat(attns, dim=2) + break + + if outs.size(0) == maxlen: + logging.warning("output length reaches maximum length") + return outs, probs, attn + + +@register_model_architecture(model_name="t5_transformer", arch_name="t5_transformer") +def base_architecture(args): + # Transformer + args.bert_init = getattr(args, "bert_init", False) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 768 * 4) + args.encoder_layers = getattr(args, "encoder_layers", 12) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 12) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim) + args.decoder_ffn_embed_dim = getattr( + args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim + ) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 12) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", args.dropout) + args.activation_dropout = getattr(args, "activation_dropout", args.dropout) + args.activation_fn = getattr(args, "activation_fn", "gelu") + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + args.decoder_output_dim = getattr( + args, "decoder_output_dim", args.decoder_embed_dim + ) + args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim) + args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0) + args.max_text_positions = getattr(args, "max_text_positions", DEFAULT_MAX_TEXT_POSITIONS) + args.max_speech_positions = getattr(args, "max_speech_positions", DEFAULT_MAX_SPEECH_POSITIONS) + + # Espnet related, including prenet, postnet + args.eprenet_conv_layers = getattr(args, "eprenet_conv_layers", 0) + args.eprenet_conv_filts = getattr(args, "eprenet_conv_filts", 0) + args.eprenet_conv_chans = getattr(args, "eprenet_conv_chans", 0) + args.use_batch_norm = getattr(args, "use_batch_norm", True) + args.eprenet_dropout_rate = getattr(args, "eprenet_dropout_rate", 0.0) + args.enc_use_scaled_pos_enc = getattr(args, "enc_use_scaled_pos_enc", True) + args.dec_use_scaled_pos_enc = getattr(args, "dec_use_scaled_pos_enc", True) + args.postnet_layers = getattr(args, "postnet_layers", 5) + args.postnet_chans = getattr(args, "postnet_chans", 256) + args.postnet_filts = getattr(args, "postnet_filts", 5) + args.postnet_dropout_rate = getattr(args, "postnet_dropout_rate", 0.5) + args.dprenet_dropout_rate = getattr(args, "dprenet_dropout_rate", 0.5) + args.dprenet_layers = getattr(args, "dprenet_layers", 2) + args.dprenet_units = getattr(args, "dprenet_units", 256) + args.initial_encoder_alpha = getattr(args, "initial_encoder_alpha", 1.0) + args.initial_decoder_alpha = getattr(args, "initial_decoder_alpha", 1.0) + args.spk_embed_integration_type = getattr(args, "spk_embed_integration_type", "pre") + args.spk_embed_dim = getattr(args, "spk_embed_dim", 512) + args.encoder_reduction_factor = getattr(args, "encoder_reduction_factor", 1) + args.reduction_factor = getattr(args, "reduction_factor", 2) + args.transformer_enc_positional_dropout_rate = getattr(args, "transformer_enc_positional_dropout_rate", 0.1) + args.transformer_dec_positional_dropout_rate = getattr(args, "transformer_dec_positional_dropout_rate", 0.1) + args.layer_norm_eps = getattr(args, "layer_norm_eps", 1e-5) + args.no_scale_embedding = getattr(args, "no_scale_embedding", True) + # Convolutional subsampler + args.encoder_speech_prenet = getattr(args, "encoder_speech_prenet", "conv") + args.conv_kernel_sizes = getattr(args, "conv_kernel_sizes", "5,5") + args.conv_channels = getattr(args, "conv_channels", 1024) + args.quant_noise_pq = getattr(args, "quant_noise_pq", 0) + + args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None) + args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0) + args.no_token_positional_embeddings = getattr( + args, "no_token_positional_embeddings", False + ) + args.adaptive_input = getattr(args, "adaptive_input", False) + args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False) + args.share_input_output_embed = getattr(args, "share_input_output_embed", False) + args.share_ctc_embed = getattr(args, "share_ctc_embed", False) + args.freeze_encoder_updates = getattr(args, "freeze_encoder_updates", 0) + args.freeze_decoder_updates = getattr(args, "freeze_decoder_updates", 0) + args.no_freeze_encoder_layer = getattr(args, "no_freeze_encoder_layer", None) + + ## sid + args.sid_embed_dim = getattr(args, "sid_embed_dim", 128) + args.sid_pooling_layer = getattr(args, "sid_pooling_layer", "decoder") + args.softmax_scale = getattr(args, "softmax_scale", 1) + args.softmax_margin = getattr(args, "softmax_margin", 0) + args.softmax_easy_margin = getattr(args, "softmax_easy_margin", False) + args.modules_filter = getattr(args, "modules_filter", None) + + ## Hubert + args.conv_pos = getattr(args, "conv_pos", 128) + args.conv_pos_groups = getattr(args, "conv_pos_groups", 16) + args.target_glu = getattr(args, "target_glu", False) + args.logit_temp = getattr(args, "logit_temp", 0.1) + args.final_dim = getattr(args, "final_dim", 256) + args.untie_final_proj = getattr(args, "untie_final_proj", True) + args.feature_grad_mult = getattr(args, "feature_grad_mult", 0.1) + args.use_sent_enc_layer = getattr(args, "use_sent_enc_layer", True) + # hubert feature extractor + args.extractor_mode = getattr(args, "extractor_mode", "default") + args.conv_feature_layers = getattr(args, "conv_feature_layers", "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2") + args.conv_bias = getattr(args, "conv_bias", False) + # mask + args.hubert_mask_length = getattr(args, "hubert_mask_length", 10) + args.mask_prob = getattr(args, "mask_prob", 0.0) + args.mask_selection = getattr(args, "mask_selection", "static") + args.mask_other = getattr(args, "mask_other", 0) + args.no_mask_overlap = getattr(args, "no_mask_overlap", False) + args.mask_min_space = getattr(args, "mask_min_space", 1) + # channel mask + args.mask_channel_length = getattr(args, "mask_channel_length", 10) + args.mask_channel_prob = getattr(args, "mask_channel_prob", 0.0) + args.mask_channel_selection = getattr(args, "mask_channel_selection", "static") + args.mask_channel_other = getattr(args, "mask_channel_other", 0) + args.no_mask_channel_overlap = getattr(args, "no_mask_channel_overlap", False) + args.mask_channel_min_space = getattr(args, "mask_channel_min_space", 1) + # loss computation + args.skip_masked = getattr(args, "skip_masked", False) + args.skip_nomask = getattr(args, "skip_nomask", False) + # conv Pos + args.use_conv_pos = getattr(args, "use_conv_pos", False) + args.use_sinc_pos = getattr(args, "use_sinc_pos", False) + + # codebook + args.use_codebook = getattr(args, "use_codebook", False) + args.latent_vars = getattr(args, "latent_vars", 100) + args.latent_groups = getattr(args, "latent_groups", 2) + args.latent_dim = getattr(args, "latent_dim", 0) + args.latent_temp = getattr(args, "latent_temp", (2, 0.5, 0.999995)) + args.quantizer_depth = getattr(args, "quantizer_depth", 1) + args.quantizer_factor = getattr(args, "quantizer_factor", 3) + args.codebook_prob = getattr(args, "codebook_prob", 0.5) + + # Relative pos embed + args.relative_position_embedding = getattr(args, "relative_position_embedding", False) + args.num_buckets = getattr(args, "num_buckets", 320) + args.max_distance = getattr(args, "max_distance", 1280) + args.encoder_max_relative_position = getattr(args, "encoder_max_relative_position", 160) + args.decoder_max_relative_position = getattr(args, "decoder_max_relative_position", 160) + +@register_model_architecture("t5_transformer", "t5_transformer_base") +def t5_transformer_base(args): + args.use_conv_pos = getattr(args, "use_conv_pos", True) + args.use_sinc_pos = getattr(args, "use_sinc_pos", True) + args.layernorm_embedding = getattr(args, "layernorm_embedding", False) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.layer_norm_first = getattr(args, "layer_norm_first", False) + args.relative_position_embedding = getattr(args, "relative_position_embedding", True) + args.dropout = getattr(args, "dropout", 0.1) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0.05) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.05) + args.mask_prob = getattr(args, "mask_prob", 0.80) + base_architecture(args) + +@register_model_architecture("t5_transformer", "t5_transformer_large") +def t5_transformer_large(args): + args.use_conv_pos = getattr(args, "use_conv_pos", True) + args.use_sinc_pos = getattr(args, "use_sinc_pos", True) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True) + args.layer_norm_first = getattr(args, "layer_norm_first", True) + args.relative_position_embedding = getattr(args, "relative_position_embedding", True) + args.dropout = getattr(args, "dropout", 0.0) + args.activation_dropout = getattr(args, "activation_dropout", 0.0) + args.attention_dropout = getattr(args, "attention_dropout", 0.0) + args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0.0) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0) + args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024) + args.encoder_layers = getattr(args, "encoder_layers", 24) + args.decoder_layers = getattr(args, "decoder_layers", 6) + args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096) + args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.feature_grad_mult = getattr(args, "feature_grad_mult", 1.0) + args.extractor_mode = getattr(args, "extractor_mode", "layer_norm") + args.final_dim = getattr(args, "final_dim", 768) + args.mask_prob = getattr(args, "mask_prob", 0.80) + base_architecture(args) + +@register_model_architecture("t5_transformer", "t5_transformer_base_asr") +def t5_transformer_base_asr(args): + args.use_conv_pos = getattr(args, "use_conv_pos", True) + args.use_sinc_pos = getattr(args, "use_sinc_pos", True) + args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False) + args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False) + args.layer_norm_first = getattr(args, "layer_norm_first", False) + args.relative_position_embedding = getattr(args, "relative_position_embedding", True) + args.dropout = getattr(args, "dropout", 0.1) + args.activation_dropout = getattr(args, "activation_dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.feature_grad_mult = getattr(args, "feature_grad_mult", 0.0) + args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0.1) + args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.1) + args.mask_prob = getattr(args, "mask_prob", 0.75) + args.mask_selection = getattr(args, "mask_selection", "static") + args.mask_channel_length = getattr(args, "mask_channel_length", 64) + args.mask_channel_prob = getattr(args, "mask_channel_prob", 0.5) + args.mask_channel_selection = getattr(args, "mask_channel_selection", "static") + args.max_text_positions = getattr(args, "max_text_positions", 600) + base_architecture(args) diff --git a/SpeechT5/SpeechT5/speecht5/models/t5_transformer_lm.py b/SpeechT5/SpeechT5/speecht5/models/t5_transformer_lm.py new file mode 100644 index 0000000000000000000000000000000000000000..7b20b7f8901e07996c198847fd916f637844ad7a --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/models/t5_transformer_lm.py @@ -0,0 +1,25 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +from fairseq.models import ( + register_model_architecture, +) +from fairseq.models.transformer_lm import base_lm_architecture + + +@register_model_architecture(model_name="transformer_lm", arch_name="transformer_lm_t5") +def transformer_lm_t5(args): + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1280) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 6144) + args.decoder_layers = getattr(args, "decoder_layers", 20) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_fn = getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) diff --git a/SpeechT5/SpeechT5/speecht5/sequence_generator.py b/SpeechT5/SpeechT5/speecht5/sequence_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..46fc676ab0e4b6dcddd4e003844bf89d0eccb775 --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/sequence_generator.py @@ -0,0 +1,1080 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import math +from typing import Dict, List, Optional +import sys + +import torch +import torch.nn as nn +from fairseq import search, utils +from fairseq.data import data_utils +from fairseq.models import FairseqIncrementalDecoder +from torch import Tensor +from fairseq.ngram_repeat_block import NGramRepeatBlock +from espnet.nets.ctc_prefix_score import CTCPrefixScore +import numpy + +CTC_SCORING_RATIO = 7.0 + +class SequenceGenerator(nn.Module): + def __init__( + self, + models, + tgt_dict, + beam_size=1, + max_len_a=0, + max_len_b=200, + max_len=0, + min_len=1, + normalize_scores=True, + len_penalty=1.0, + unk_penalty=0.0, + temperature=1.0, + match_source_len=False, + no_repeat_ngram_size=0, + search_strategy=None, + eos=None, + symbols_to_strip_from_output=None, + lm_model=None, + lm_weight=1.0, + ctc_weight=0.0, + ): + """Generates translations of a given source sentence. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models, + currently support fairseq.models.TransformerModel for scripting + beam_size (int, optional): beam width (default: 1) + max_len_a/b (int, optional): generate sequences of maximum length + ax + b, where x is the source length + max_len (int, optional): the maximum length of the generated output + (not including end-of-sentence) + min_len (int, optional): the minimum length of the generated output + (not including end-of-sentence) + normalize_scores (bool, optional): normalize scores by the length + of the output (default: True) + len_penalty (float, optional): length penalty, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + unk_penalty (float, optional): unknown word penalty, where <0 + produces more unks, >0 produces fewer (default: 0.0) + temperature (float, optional): temperature, where values + >1.0 produce more uniform samples and values <1.0 produce + sharper samples (default: 1.0) + match_source_len (bool, optional): outputs should match the source + length (default: False) + """ + super().__init__() + if isinstance(models, EnsembleModel): + self.model = models + else: + self.model = EnsembleModel(models) + self.tgt_dict = tgt_dict + self.pad = tgt_dict.pad() + self.unk = tgt_dict.unk() + self.eos = tgt_dict.eos() if eos is None else eos + self.blank = self.tgt_dict.index("") + self.mask = self.tgt_dict.index("") + self.mask_idxs = [] + if self.tgt_dict.index("0") != self.unk: + count = 0 + while self.tgt_dict.index("" + str(count)) != self.unk: + self.mask_idxs.append(self.tgt_dict.index("" + str(count))) + count += 1 + self.mask_idxs = torch.tensor(self.mask_idxs) + self.symbols_to_strip_from_output = ( + symbols_to_strip_from_output.union({self.eos}) + if symbols_to_strip_from_output is not None + else {self.eos} + ) + self.vocab_size = len(tgt_dict) + self.beam_size = beam_size + # the max beam size is the dictionary size - 1, since we never select pad + self.beam_size = min(beam_size, self.vocab_size - 1) + self.max_len_a = max_len_a + self.max_len_b = max_len_b + self.min_len = min_len + self.max_len = max_len or self.model.max_decoder_positions() + + self.normalize_scores = normalize_scores + self.len_penalty = len_penalty + self.unk_penalty = unk_penalty + self.temperature = temperature + self.match_source_len = match_source_len + + if no_repeat_ngram_size > 0: + self.repeat_ngram_blocker = NGramRepeatBlock(no_repeat_ngram_size) + else: + self.repeat_ngram_blocker = None + + assert temperature > 0, "--temperature must be greater than 0" + + self.search = ( + search.BeamSearch(tgt_dict) if search_strategy is None else search_strategy + ) + # We only need to set src_lengths in LengthConstrainedBeamSearch. + # As a module attribute, setting it would break in multithread + # settings when the model is shared. + self.should_set_src_lengths = ( + hasattr(self.search, "needs_src_lengths") and self.search.needs_src_lengths + ) + + self.model.eval() + + self.lm_model = lm_model + self.lm_weight = lm_weight + self.ctc_weight = ctc_weight + if self.lm_model is not None: + self.lm_model.eval() + + def cuda(self): + self.model.cuda() + return self + + @torch.no_grad() + def forward( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + """Generate a batch of translations. + + Args: + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, prefix_tokens, bos_token=bos_token) + + # TODO(myleott): unused, deprecate after pytorch-translate migration + def generate_batched_itr(self, data_itr, beam_size=None, cuda=False, timer=None): + """Iterate over a batched dataset and yield individual translations. + Args: + cuda (bool, optional): use GPU for generation + timer (StopwatchMeter, optional): time generations + """ + for sample in data_itr: + s = utils.move_to_cuda(sample) if cuda else sample + if "net_input" not in s: + continue + input = s["net_input"] + # model.forward normally channels prev_output_tokens into the decoder + # separately, but SequenceGenerator directly calls model.encoder + encoder_input = { + k: v for k, v in input.items() if k != "prev_output_tokens" + } + if timer is not None: + timer.start() + with torch.no_grad(): + hypos = self.generate(encoder_input) + if timer is not None: + timer.stop(sum(len(h[0]["tokens"]) for h in hypos)) + for i, id in enumerate(s["id"].data): + # remove padding + src = utils.strip_pad(input["src_tokens"].data[i, :], self.pad) + ref = ( + utils.strip_pad(s["target"].data[i, :], self.pad) + if s["target"] is not None + else None + ) + yield id, src, ref, hypos[i] + + @torch.no_grad() + def generate(self, models, sample: Dict[str, Dict[str, Tensor]], **kwargs): + """Generate translations. Match the api of other fairseq generators. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + constraints (torch.LongTensor, optional): force decoder to include + the list of constraints + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, **kwargs) + + def _generate( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + constraints: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) + for i in range(self.model.models_size) + ], + ) + net_input = sample["net_input"] + + if "src_tokens" in net_input: + src_tokens = net_input["src_tokens"] + # length of the source text being the character length except EndOfSentence and pad + src_lengths = ( + (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1) + ) + elif "source" in net_input: + src_tokens = net_input["source"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + elif "features" in net_input: + src_tokens = net_input["features"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + else: + raise Exception("expected src_tokens or source in net input. input keys: " + str(net_input.keys())) + + # bsz: total number of sentences in beam + # Note that src_tokens may have more than 2 dimensions (i.e. audio features) + bsz, src_len = src_tokens.size()[:2] + beam_size = self.beam_size + + if constraints is not None and not self.search.supports_constraints: + raise NotImplementedError( + "Target-side constraints were provided, but search method doesn't support them" + ) + + # Initialize constraints, when active + self.search.init_constraints(constraints, beam_size) + + max_len: int = -1 + if self.match_source_len: + max_len = src_lengths.max().item() + else: + max_len = min( + int(self.max_len_a * src_len + self.max_len_b), + self.max_len - 1, + ) + assert ( + self.min_len <= max_len + ), "min_len cannot be larger than max_len, please adjust these!" + # compute the encoder output for each beam + encoder_outs = self.model.forward_encoder(net_input) + + # Get CTC lprobs and prep ctc_scorer + if self.ctc_weight > 0: + ctc_lprobs = self.model.models[0].get_normalized_probs_for_ctc( + encoder_outs[0], log_probs=True + ).contiguous().transpose(0, 1) # (B, T, C) from the encoder + + hyp = {} + ctc_prefix_score = CTCPrefixScore(ctc_lprobs[0].detach().cpu().numpy(), self.blank, self.eos, numpy) + hyp["ctc_state_prev"] = ctc_prefix_score.initial_state() + hyp["ctc_score_prev"] = 0.0 + ctc_beam = min(ctc_lprobs.shape[-1] - self.mask_idxs.size(-1), int(beam_size * CTC_SCORING_RATIO)) + ctc_hyps = {str(self.eos): hyp} + + # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores + new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) + new_order = new_order.to(src_tokens.device).long() + encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order) + # ensure encoder_outs is a List. + assert encoder_outs is not None + + # initialize buffers + scores = ( + torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float() + ) # +1 for eos; pad is never chosen for scoring + tokens = ( + torch.zeros(bsz * beam_size, max_len + 2) + .to(src_tokens) + .long() + .fill_(self.pad) + ) # +2 for eos and pad + tokens[:, 0] = self.eos if bos_token is None else bos_token + attn: Optional[Tensor] = None + + # A list that indicates candidates that should be ignored. + # For example, suppose we're sampling and have already finalized 2/5 + # samples. Then cands_to_ignore would mark 2 positions as being ignored, + # so that we only finalize the remaining 3 samples. + cands_to_ignore = ( + torch.zeros(bsz, beam_size).to(src_tokens).eq(-1) + ) # forward and backward-compatible False mask + + # list of completed sentences + finalized = torch.jit.annotate( + List[List[Dict[str, Tensor]]], + [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)], + ) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step + + # a boolean array indicating if the sentence at the index is finished or not + finished = [False for i in range(bsz)] + num_remaining_sent = bsz # number of sentences remaining + + # number of candidate hypos per step + cand_size = 2 * beam_size # 2 x beam size in case half are EOS + + # offset arrays for converting between different indexing schemes + bbsz_offsets = ( + (torch.arange(0, bsz) * beam_size) + .unsqueeze(1) + .type_as(tokens) + .to(src_tokens.device) + ) + cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device) + + reorder_state: Optional[Tensor] = None + ctc_state = None + batch_idxs: Optional[Tensor] = None + + original_batch_idxs: Optional[Tensor] = None + if "id" in sample and isinstance(sample["id"], Tensor): + original_batch_idxs = sample["id"] + else: + original_batch_idxs = torch.arange(0, bsz).type_as(tokens) + + for step in range(max_len + 1): # one extra step for EOS marker + # reorder decoder internal states based on the prev choice of beams + if reorder_state is not None: + if batch_idxs is not None: + # update beam indices to take into account removed sentences + corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as( + batch_idxs + ) + reorder_state.view(-1, beam_size).add_( + corr.unsqueeze(-1) * beam_size + ) + original_batch_idxs = original_batch_idxs[batch_idxs] + self.model.reorder_incremental_state(incremental_states, reorder_state) + encoder_outs = self.model.reorder_encoder_out( + encoder_outs, reorder_state + ) + + lprobs, avg_attn_scores = self.model.forward_decoder( + tokens[:, : step + 1], + encoder_outs, + incremental_states, + self.temperature, + ) + + if self.ctc_weight > 0 and step != 0: + # lprobs[:, self.blank] = -math.inf # never select blank + ctc_lprobs = lprobs.clone() + ctc_lprobs[:, self.blank] = -math.inf # never select blank + if self.mask != self.unk: + ctc_lprobs[:, self.mask] = -math.inf # never select mask + if self.mask_idxs.size(0) != 0: + ctc_lprobs[:, self.mask_idxs] = -math.inf # never select mask + local_best_scores, local_best_ids = torch.topk(ctc_lprobs, ctc_beam, dim=-1) + for b in range(tokens.size(0)): + hyp_key = " ".join(str(x) for x in tokens[b, : step + 1].tolist()) + ctc_scores, ctc_states = ctc_prefix_score( + tokens[b, : step + 1].cpu(), local_best_ids[b].cpu(), ctc_hyps[hyp_key]["ctc_state_prev"] + ) + lprobs[b] = lprobs[b] + lprobs[b, local_best_ids[b]] = (1 - self.ctc_weight) * (lprobs[b, local_best_ids[b]]) + self.ctc_weight * torch.from_numpy( + ctc_scores - ctc_hyps[hyp_key]["ctc_score_prev"] + ).to(device="cuda") + for j in range(len(local_best_ids[b])): + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())] = {} + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_score_prev"] = ctc_scores[j] + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_state_prev"] = ctc_states[j] + + # local_ctc_scores, ctc_state = ctc_scorer( + # tokens[:, : step + 1], ctc_state, part_ids + # ) + # lprobs += local_ctc_scores * self.ctc_weight + elif self.ctc_weight > 0 and step == 0: + ctc_lprobs = lprobs.clone() + ctc_lprobs[:, self.blank] = -math.inf # never select blank + if self.mask != self.unk: + ctc_lprobs[:, self.mask] = -math.inf # never select mask + if self.mask_idxs.size(0) != 0: + ctc_lprobs[:, self.mask_idxs] = -math.inf # never select mask + local_best_scores, local_best_ids = torch.topk(ctc_lprobs, ctc_beam, dim=-1) + for b in range(tokens.size(0)): + hyp_key = " ".join(str(x) for x in tokens[b, : step + 1].tolist()) + ctc_scores, ctc_states = ctc_prefix_score( + tokens[b, : step + 1].cpu(), local_best_ids[b].cpu(), ctc_hyps[hyp_key]["ctc_state_prev"] + ) + lprobs[b] = lprobs[b] + lprobs[b, local_best_ids[b]] = (1 - self.ctc_weight) * (lprobs[b, local_best_ids[b]]) + self.ctc_weight * torch.from_numpy( + ctc_scores - ctc_hyps[hyp_key]["ctc_score_prev"] + ).to(device="cuda") + for j in range(len(local_best_ids[b])): + if b == 0: + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())] = {} + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_score_prev"] = ctc_scores[j] + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_state_prev"] = ctc_states[j] + + if self.lm_model is not None: + lm_out = self.lm_model(tokens[:, : step + 1]) + probs = self.lm_model.get_normalized_probs( + lm_out, log_probs=True, sample=None + ) + probs = probs[:, -1, :] * self.lm_weight + lprobs[:, :probs.size(1)] += probs + + # handle prefix tokens (possibly with different lengths) + if ( + prefix_tokens is not None + and step < prefix_tokens.size(1) + and step < max_len + ): + lprobs, tokens, scores = self._prefix_tokens( + step, lprobs, scores, tokens, prefix_tokens, beam_size + ) + elif step < self.min_len: + # minimum length constraint (does not apply if using prefix_tokens) + lprobs[:, self.eos] = -math.inf + + lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs) + + lprobs[:, self.pad] = -math.inf # never select pad + lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty + lprobs[:, self.blank] = -math.inf # never select blank + if self.mask != self.unk: + lprobs[:, self.mask] = -math.inf # never select mask + if self.mask_idxs.size(0) != 0: + lprobs[:, self.mask_idxs] = -math.inf # never select mask + + # handle max length constraint + if step >= max_len: + lprobs[:, : self.eos] = -math.inf + lprobs[:, self.eos + 1 :] = -math.inf + + # Record attention scores, only support avg_attn_scores is a Tensor + if avg_attn_scores is not None: + if attn is None: + attn = torch.empty( + bsz * beam_size, avg_attn_scores.size(1), max_len + 2 + ).to(scores) + attn[:, :, step + 1].copy_(avg_attn_scores) + + scores = scores.type_as(lprobs) + eos_bbsz_idx = torch.empty(0).to( + tokens + ) # indices of hypothesis ending with eos (finished sentences) + eos_scores = torch.empty(0).to( + scores + ) # scores of hypothesis ending with eos (finished sentences) + + if self.should_set_src_lengths: + self.search.set_src_lengths(src_lengths) + + if self.repeat_ngram_blocker is not None: + lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz, beam_size, step) + + # Shape: (batch, cand_size) + cand_scores, cand_indices, cand_beams = self.search.step( + step, + lprobs.view(bsz, -1, self.vocab_size), + scores.view(bsz, beam_size, -1)[:, :, :step], + tokens[:, : step + 1], + original_batch_idxs, + ) + + # cand_bbsz_idx contains beam indices for the top candidate + # hypotheses, with a range of values: [0, bsz*beam_size), + # and dimensions: [bsz, cand_size] + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + + # finalize hypotheses that end in eos + # Shape of eos_mask: (batch size, beam size) + eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf) + eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask) + + # only consider eos when it's among the top beam_size indices + # Now we know what beam item(s) to finish + # Shape: 1d list of absolute-numbered + eos_bbsz_idx = torch.masked_select( + cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents: List[int] = [] + if eos_bbsz_idx.numel() > 0: + eos_scores = torch.masked_select( + cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents = self.finalize_hypos( + step, + eos_bbsz_idx, + eos_scores, + tokens, + scores, + finalized, + finished, + beam_size, + attn, + src_lengths, + max_len, + ) + num_remaining_sent -= len(finalized_sents) + + assert num_remaining_sent >= 0 + if num_remaining_sent == 0: + break + if self.search.stop_on_max_len and step >= max_len: + break + assert step < max_len, f"{step} < {max_len}" + + # Remove finalized sentences (ones for which {beam_size} + # finished hypotheses have been generated) from the batch. + if len(finalized_sents) > 0: + new_bsz = bsz - len(finalized_sents) + + # construct batch_idxs which holds indices of batches to keep for the next pass + batch_mask = torch.ones( + bsz, dtype=torch.bool, device=cand_indices.device + ) + batch_mask[finalized_sents] = False + # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it + batch_idxs = torch.arange( + bsz, device=cand_indices.device + ).masked_select(batch_mask) + + # Choose the subset of the hypothesized constraints that will continue + self.search.prune_sentences(batch_idxs) + + eos_mask = eos_mask[batch_idxs] + cand_beams = cand_beams[batch_idxs] + bbsz_offsets.resize_(new_bsz, 1) + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + cand_scores = cand_scores[batch_idxs] + cand_indices = cand_indices[batch_idxs] + + if prefix_tokens is not None: + prefix_tokens = prefix_tokens[batch_idxs] + src_lengths = src_lengths[batch_idxs] + cands_to_ignore = cands_to_ignore[batch_idxs] + + scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + if attn is not None: + attn = attn.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, attn.size(1), -1 + ) + bsz = new_bsz + else: + batch_idxs = None + + # Set active_mask so that values > cand_size indicate eos hypos + # and values < cand_size indicate candidate active hypos. + # After, the min values per row are the top candidate active hypos + + # Rewrite the operator since the element wise or is not supported in torchscript. + + eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size])) + active_mask = torch.add( + eos_mask.type_as(cand_offsets) * cand_size, + cand_offsets[: eos_mask.size(1)], + ) + + # get the top beam_size active hypotheses, which are just + # the hypos with the smallest values in active_mask. + # {active_hypos} indicates which {beam_size} hypotheses + # from the list of {2 * beam_size} candidates were + # selected. Shapes: (batch size, beam size) + new_cands_to_ignore, active_hypos = torch.topk( + active_mask, k=beam_size, dim=1, largest=False + ) + + # update cands_to_ignore to ignore any finalized hypos. + cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size] + # Make sure there is at least one active item for each sentence in the batch. + assert (~cands_to_ignore).any(dim=1).all() + + # update cands_to_ignore to ignore any finalized hypos + + # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam + # can be selected more than once). + active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos) + active_scores = torch.gather(cand_scores, dim=1, index=active_hypos) + + active_bbsz_idx = active_bbsz_idx.view(-1) + active_scores = active_scores.view(-1) + + # copy tokens and scores for active hypotheses + + # Set the tokens for each beam (can select the same row more than once) + tokens[:, : step + 1] = torch.index_select( + tokens[:, : step + 1], dim=0, index=active_bbsz_idx + ) + # Select the next token for each of them + tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather( + cand_indices, dim=1, index=active_hypos + ) + if step > 0: + scores[:, :step] = torch.index_select( + scores[:, :step], dim=0, index=active_bbsz_idx + ) + scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather( + cand_scores, dim=1, index=active_hypos + ) + + # Update constraints based on which candidates were selected for the next beam + self.search.update_constraints(active_hypos) + + # copy attention for active hypotheses + if attn is not None: + attn[:, :, : step + 2] = torch.index_select( + attn[:, :, : step + 2], dim=0, index=active_bbsz_idx + ) + + # reorder incremental state in decoder + reorder_state = active_bbsz_idx + + # if self.ctc_weight > 0: + # accum_best_id = torch.gather(cand_indices, dim=1, index=active_hypos) + # ctc_state = ctc_scorer.index_select_state( + # ctc_state, accum_best_id + # ) + + # sort by score descending + for sent in range(len(finalized)): + scores = torch.tensor( + [float(elem["score"].item()) for elem in finalized[sent]] + ) + _, sorted_scores_indices = torch.sort(scores, descending=True) + finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices] + finalized[sent] = torch.jit.annotate( + List[Dict[str, Tensor]], finalized[sent] + ) + return finalized + + def _prefix_tokens( + self, step: int, lprobs, scores, tokens, prefix_tokens, beam_size: int + ): + """Handle prefix tokens""" + prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1) + prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1)) + prefix_mask = prefix_toks.ne(self.pad) + lprobs[prefix_mask] = torch.min(prefix_lprobs) - 1 + lprobs[prefix_mask] = lprobs[prefix_mask].scatter( + -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs[prefix_mask] + ) + # if prefix includes eos, then we should make sure tokens and + # scores are the same across all beams + eos_mask = prefix_toks.eq(self.eos) + if eos_mask.any(): + # validate that the first beam matches the prefix + first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[ + :, 0, 1 : step + 1 + ] + eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0] + target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step] + assert (first_beam == target_prefix).all() + + # copy tokens, scores and lprobs from the first beam to all beams + tokens = self.replicate_first_beam(tokens, eos_mask_batch_dim, beam_size) + scores = self.replicate_first_beam(scores, eos_mask_batch_dim, beam_size) + lprobs = self.replicate_first_beam(lprobs, eos_mask_batch_dim, beam_size) + return lprobs, tokens, scores + + def replicate_first_beam(self, tensor, mask, beam_size: int): + tensor = tensor.view(-1, beam_size, tensor.size(-1)) + tensor[mask] = tensor[mask][:, :1, :] + return tensor.view(-1, tensor.size(-1)) + + def finalize_hypos( + self, + step: int, + bbsz_idx, + eos_scores, + tokens, + scores, + finalized: List[List[Dict[str, Tensor]]], + finished: List[bool], + beam_size: int, + attn: Optional[Tensor], + src_lengths, + max_len: int, + ): + """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly. + A sentence is finalized when {beam_size} finished items have been collected for it. + Returns number of sentences (not beam items) being finalized. + These will be removed from the batch and not processed further. + Args: + bbsz_idx (Tensor): + """ + assert bbsz_idx.numel() == eos_scores.numel() + + # clone relevant token and attention tensors. + # tokens is (batch * beam, max_len). So the index_select + # gets the newly EOS rows, then selects cols 1..{step + 2} + tokens_clone = tokens.index_select(0, bbsz_idx)[ + :, 1 : step + 2 + ] # skip the first index, which is EOS + + tokens_clone[:, step] = self.eos + attn_clone = ( + attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2] + if attn is not None + else None + ) + + # compute scores per token position + pos_scores = scores.index_select(0, bbsz_idx)[:, : step + 1] + pos_scores[:, step] = eos_scores + # convert from cumulative to per-position scores + pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] + + # normalize sentence-level scores + if self.normalize_scores: + eos_scores /= (step + 1) ** self.len_penalty + + # cum_unfin records which sentences in the batch are finished. + # It helps match indexing between (a) the original sentences + # in the batch and (b) the current, possibly-reduced set of + # sentences. + cum_unfin: List[int] = [] + prev = 0 + for f in finished: + if f: + prev += 1 + else: + cum_unfin.append(prev) + cum_fin_tensor = torch.tensor(cum_unfin, dtype=torch.int).to(bbsz_idx) + + unfin_idx = bbsz_idx // beam_size + sent = unfin_idx + torch.index_select(cum_fin_tensor, 0, unfin_idx) + + # Create a set of "{sent}{unfin_idx}", where + # "unfin_idx" is the index in the current (possibly reduced) + # list of sentences, and "sent" is the index in the original, + # unreduced batch + # For every finished beam item + # sentence index in the current (possibly reduced) batch + seen = (sent << 32) + unfin_idx + unique_seen: List[int] = torch.unique(seen).tolist() + + if self.match_source_len: + condition = step > torch.index_select(src_lengths, 0, unfin_idx) + eos_scores = torch.where(condition, torch.tensor(-math.inf), eos_scores) + sent_list: List[int] = sent.tolist() + for i in range(bbsz_idx.size()[0]): + # An input sentence (among those in a batch) is finished when + # beam_size hypotheses have been collected for it + if len(finalized[sent_list[i]]) < beam_size: + if attn_clone is not None: + # remove padding tokens from attn scores + hypo_attn = attn_clone[i] + else: + hypo_attn = torch.empty(0) + + finalized[sent_list[i]].append( + { + "tokens": tokens_clone[i], + "score": eos_scores[i], + "attention": hypo_attn, # src_len x tgt_len + "alignment": torch.empty(0), + "positional_scores": pos_scores[i], + } + ) + + newly_finished: List[int] = [] + for unique_s in unique_seen: + # check termination conditions for this sentence + unique_sent: int = unique_s >> 32 + unique_unfin_idx: int = unique_s - (unique_sent << 32) + + if not finished[unique_sent] and self.is_finished( + step, unique_unfin_idx, max_len, len(finalized[unique_sent]), beam_size + ): + finished[unique_sent] = True + newly_finished.append(unique_unfin_idx) + + return newly_finished + + def is_finished( + self, + step: int, + unfin_idx: int, + max_len: int, + finalized_sent_len: int, + beam_size: int, + ): + """ + Check whether decoding for a sentence is finished, which + occurs when the list of finalized sentences has reached the + beam size, or when we reach the maximum length. + """ + assert finalized_sent_len <= beam_size + if finalized_sent_len == beam_size or step == max_len: + return True + return False + + +class EnsembleModel(nn.Module): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__() + self.models_size = len(models) + # method '__len__' is not supported in ModuleList for torch script + self.single_model = models[0] + self.models = nn.ModuleList(models) + + self.has_incremental: bool = False + if all( + hasattr(m, "decoder") and isinstance(m.decoder, FairseqIncrementalDecoder) + for m in models + ): + self.has_incremental = True + + def forward(self): + pass + + def has_encoder(self): + return hasattr(self.single_model, "encoder") + + def is_t5_structure(self): + t5_structure = hasattr(self.single_model, "text_encoder_prenet") and hasattr(self.single_model, "speech_encoder_prenet") or \ + hasattr(self.single_model, "encoder_prenet") and hasattr(self.single_model, "encoder_prenet") + return t5_structure + + def has_incremental_states(self): + return self.has_incremental + + def max_decoder_positions(self): + return min([m.max_decoder_positions() for m in self.models if hasattr(m, "max_decoder_positions")] + [sys.maxsize]) + + @torch.jit.export + def forward_encoder(self, net_input: Dict[str, Tensor]): + if not self.has_encoder(): + return None + elif self.is_t5_structure(): + return [model.forward_encoder_torchscript(net_input) for model in self.models] + else: + return [model.encoder.forward_torchscript(net_input) for model in self.models] + + @torch.jit.export + def forward_decoder( + self, + tokens, + encoder_outs: List[Dict[str, List[Tensor]]], + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + temperature: float = 1.0, + ): + log_probs = [] + avg_attn: Optional[Tensor] = None + encoder_out: Optional[Dict[str, List[Tensor]]] = None + for i, model in enumerate(self.models): + if self.has_encoder(): + encoder_out = encoder_outs[i] + # decode each model + if self.has_incremental_states(): + if self.is_t5_structure: + decoder_out = model.forward_decoder( + tokens, + encoder_out=encoder_out, + incremental_state=incremental_states[i] + ) + else: + decoder_out = model.decoder.forward( + tokens, + encoder_out=encoder_out, + incremental_state=incremental_states[i], + ) + else: + if hasattr(model, "decoder"): + decoder_out = model.decoder.forward(tokens, encoder_out=encoder_out) + else: + decoder_out = model.forward(tokens) + + attn: Optional[Tensor] = None + decoder_len = len(decoder_out) + if decoder_len > 1 and decoder_out[1] is not None: + if isinstance(decoder_out[1], Tensor): + attn = decoder_out[1] + else: + attn_holder = decoder_out[1]["attn"] + if isinstance(attn_holder, Tensor): + attn = attn_holder + elif attn_holder is not None: + attn = attn_holder[0] + if attn is not None: + attn = attn[:, -1, :] + + decoder_out_tuple = ( + decoder_out[0][:, -1:, :].div_(temperature), + None if decoder_len <= 1 else decoder_out[1], + ) + probs = model.get_normalized_probs( + decoder_out_tuple, log_probs=True, sample=None + ) + probs = probs[:, -1, :] + if self.models_size == 1: + return probs, attn + + log_probs.append(probs) + if attn is not None: + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + + avg_probs = torch.logsumexp(torch.stack(log_probs, dim=0), dim=0) - math.log( + self.models_size + ) + + if avg_attn is not None: + avg_attn.div_(self.models_size) + return avg_probs, avg_attn + + @torch.jit.export + def reorder_encoder_out( + self, encoder_outs: Optional[List[Dict[str, List[Tensor]]]], new_order + ): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + new_outs: List[Dict[str, List[Tensor]]] = [] + if not self.has_encoder(): + return new_outs + for i, model in enumerate(self.models): + assert encoder_outs is not None + new_outs.append( + model.encoder.reorder_encoder_out(encoder_outs[i], new_order) + ) + return new_outs + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + new_order, + ): + if not self.has_incremental_states(): + return + for i, model in enumerate(self.models): + model.decoder.reorder_incremental_state_scripting( + incremental_states[i], new_order + ) + + +class SequenceGeneratorWithAlignment(SequenceGenerator): + def __init__( + self, models, tgt_dict, left_pad_target=False, print_alignment="hard", **kwargs + ): + """Generates translations of a given source sentence. + + Produces alignments following "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + left_pad_target (bool, optional): Whether or not the + hypothesis should be left padded or not when they are + teacher forced for generating alignments. + """ + super().__init__(EnsembleModelWithAlignment(models), tgt_dict, **kwargs) + self.left_pad_target = left_pad_target + + if print_alignment == "hard": + self.extract_alignment = utils.extract_hard_alignment + elif print_alignment == "soft": + self.extract_alignment = utils.extract_soft_alignment + + @torch.no_grad() + def generate(self, models, sample, **kwargs): + finalized = super()._generate(sample, **kwargs) + + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + beam_size = self.beam_size + ( + src_tokens, + src_lengths, + prev_output_tokens, + tgt_tokens, + ) = self._prepare_batch_for_alignment(sample, finalized) + if any(getattr(m, "full_context_alignment", False) for m in self.model.models): + attn = self.model.forward_align(src_tokens, src_lengths, prev_output_tokens) + else: + attn = [ + finalized[i // beam_size][i % beam_size]["attention"].transpose(1, 0) + for i in range(bsz * beam_size) + ] + + if src_tokens.device != "cpu": + src_tokens = src_tokens.to("cpu") + tgt_tokens = tgt_tokens.to("cpu") + attn = [i.to("cpu") for i in attn] + + # Process the attn matrix to extract hard alignments. + for i in range(bsz * beam_size): + alignment = self.extract_alignment( + attn[i], src_tokens[i], tgt_tokens[i], self.pad, self.eos + ) + finalized[i // beam_size][i % beam_size]["alignment"] = alignment + return finalized + + def _prepare_batch_for_alignment(self, sample, hypothesis): + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + src_tokens = ( + src_tokens[:, None, :] + .expand(-1, self.beam_size, -1) + .contiguous() + .view(bsz * self.beam_size, -1) + ) + src_lengths = sample["net_input"]["src_lengths"] + src_lengths = ( + src_lengths[:, None] + .expand(-1, self.beam_size) + .contiguous() + .view(bsz * self.beam_size) + ) + prev_output_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=True, + ) + tgt_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=False, + ) + return src_tokens, src_lengths, prev_output_tokens, tgt_tokens + + +class EnsembleModelWithAlignment(EnsembleModel): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__(models) + + def forward_align(self, src_tokens, src_lengths, prev_output_tokens): + avg_attn = None + for model in self.models: + decoder_out = model(src_tokens, src_lengths, prev_output_tokens) + attn = decoder_out[1]["attn"][0] + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + if len(self.models) > 1: + avg_attn.div_(len(self.models)) + return avg_attn diff --git a/SpeechT5/SpeechT5/speecht5/tasks/__init__.py b/SpeechT5/SpeechT5/speecht5/tasks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SpeechT5/SpeechT5/speecht5/tasks/__pycache__/__init__.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/tasks/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f14bf3959df35a0d70ba86dc6ac25d9ac6cf93a2 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/tasks/__pycache__/__init__.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/tasks/__pycache__/speecht5.cpython-38.pyc b/SpeechT5/SpeechT5/speecht5/tasks/__pycache__/speecht5.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..65cacd6c908acd2024c1c0ab91255f60b16442c5 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5/tasks/__pycache__/speecht5.cpython-38.pyc differ diff --git a/SpeechT5/SpeechT5/speecht5/tasks/speecht5.py b/SpeechT5/SpeechT5/speecht5/tasks/speecht5.py new file mode 100644 index 0000000000000000000000000000000000000000..ff3c88eb6bd6736608b17f8faa200814f65956ae --- /dev/null +++ b/SpeechT5/SpeechT5/speecht5/tasks/speecht5.py @@ -0,0 +1,700 @@ +# -------------------------------------------------------- +# SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing (https://arxiv.org/abs/2110.07205) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechT5 +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq and espnet code bases +# https://github.com/pytorch/fairseq; https://github.com/espnet/espnet +# -------------------------------------------------------- + +import logging +import os.path as op +from argparse import Namespace +from collections import OrderedDict + +import torch +from fairseq.data import ( + Dictionary, + encoders, + PrependTokenDataset, + AppendTokenDataset, + data_utils, + StripTokenDataset, + TokenBlockDataset, +) +from fairseq.data.encoders.utils import get_whole_word_mask +from fairseq import utils +from speecht5.data.multitask_dataset import MultitaskDataset +from speecht5.data.speech_to_text_dataset import SpeechToTextDataset +from speecht5.data.text_to_speech_dataset import TextToSpeechDataset +from speecht5.data.speech_to_speech_dataset import SpeechToSpeechDataset +from speecht5.data.speech_to_class_dataset import SpeechToClassDataset +from speecht5.data.speech_dataset import SpeechPretrainDataset +from speecht5.data.text_dataset import TextPretrainDataset +from fairseq.data.shorten_dataset import maybe_shorten_dataset +from fairseq.tasks import LegacyFairseqTask, register_task +from fairseq.tasks.hubert_pretraining import LabelEncoder + +logger = logging.getLogger(__name__) + +TASK_NAME = ["s2t", "t2s", "s2s", "s2c", "pretrain"] + +@register_task("speecht5") +class SpeechT5Task(LegacyFairseqTask): + @staticmethod + def add_args(parser): + parser.add_argument("data", help="manifest root path") + parser.add_argument( + "--config-yaml", + type=str, + default="config.yaml", + help="Configuration YAML filename (under manifest root)", + ) + parser.add_argument( + "--max-speech-sample-size", + default=None, + type=int, + metavar="N", + help="max speech sample size", + ) + parser.add_argument( + "--min-speech-sample-size", + default=None, + type=int, + metavar="N", + help="min speech sample size", + ) + parser.add_argument( + "--max-speech-positions", + default=4000, + type=int, + metavar="N", + help="max number of tokens in the source sequence", + ) + parser.add_argument( + "--max-text-positions", + default=450, + type=int, + metavar="N", + help="max number of tokens in the target sequence", + ) + parser.add_argument( + '--t5-task', + choices=TASK_NAME, + help='task for training' + ) + parser.add_argument( + "--bpe-tokenizer", + type=str, + default=None, + help="bpe tokenizer for s2t", + ) + # Speaker Identification (SID) + parser.add_argument( + "--finetune-from-modules", + default=None, + # choices=[ + # "encoder-decoder", "encoder", "decoder", + # "speech_encoder_prenet-encoder-decoder-text_decoder_prenet-text_decoder_postnet", # ASR, T5 SID + # "speech_encoder_prenet-encoder-decoder-text_decoder_prenet-speaker_decoder_postnet", # SID + # "speech_encoder_prenet-encoder-decoder-speech_decoder_prenet-speech_decoder_postnet", # VC, SE + # "text_encoder_prenet-encoder-decoder-speech_decoder_prenet-speech_decoder_postnet", # TTS + # ], + help="If set, using part modules of finetune model.", + ) + parser.add_argument( + "--finetune-out-of-modules", + default=None, + # choices=[ + # "speaker_decoder_postnet", # SID + # "speech_decoder_postnet", # SE with reduction factor 1 + # ], + help="If set, remove part modules of finetune model.", + ) + # BART + parser.add_argument( + "--shorten-method", + default="none", + choices=["none", "truncate", "random_crop"], + help="if not none, shorten sequences that exceed --tokens-per-sample", + ) + parser.add_argument( + "--shorten-data-split-list", + default="", + help="comma-separated list of dataset splits to apply shortening to, " + 'e.g., "train,valid" (default: all dataset splits)', + ) + + parser.add_argument( + "--tokens-per-sample", + default=512, + type=int, + help="max number of total tokens over all segments" + " per sample for dataset", + ) + parser.add_argument( + "--sample-break-mode", + default="eos", + type=str, + help="mode for breaking sentence", + ) + parser.add_argument( + "--mask", + default=0.3, + type=float, + help="fraction of words/subwords that will be masked", + ) + parser.add_argument( + "--mask-random", + default=0.1, + type=float, + help="instead of using [MASK], use random token this often", + ) + parser.add_argument( + "--insert", + default=0.0, + type=float, + help="insert this percentage of additional random tokens", + ) + parser.add_argument( + "--permute", + default=0.0, + type=float, + help="take this proportion of subwords and permute them", + ) + parser.add_argument( + "--rotate", + default=0.0, + type=float, + help="rotate this proportion of inputs", + ) + parser.add_argument( + "--poisson-lambda", + default=3.5, + type=float, + help="randomly shuffle sentences for this proportion of inputs", + ) + parser.add_argument( + "--permute-sentences", + default=0.0, + type=float, + help="shuffle this proportion of sentences in all inputs", + ) + parser.add_argument( + "--mask-length", + default="span-poisson", + type=str, + choices=["subword", "word", "span-poisson"], + help="mask length to choose", + ) + parser.add_argument( + "--replace-length", + default=1, + type=int, + help="when masking N tokens, replace with 0, 1, or N tokens (use -1 for N)", + ) + parser.add_argument( + "--iid-noise-target", + action="store_true", + help="whether to use t5 form target", + ) + # Hubert + parser.add_argument( + "--hubert-labels", + nargs="*", + type=str, + default=['km'], + help="extension of the label files to load, frame-level labels for pre-training, and sequence-level label for fine-tuning", + ) + parser.add_argument( + "--hubert-label-dir", + type=str, + default=None, + help="if set, looks for labels in this directory instead", + ) + parser.add_argument( + "--sample-rate", + default=100, + type=float, + help="target sample rate. audio files will be up/down sampled to this rate", + ) + parser.add_argument( + "--label-rates", + default=-1, + type=float, + help="if set, looks for labels in this directory instead", + ) + parser.add_argument( + "--normalize", + action="store_true", + help="if set, normalizes input to have 0 mean and unit variance", + ) + parser.add_argument( + "--enable-padding", + action="store_true", + help="pad shorter samples instead of cropping", + ) + parser.add_argument( + "--pad-audio", + action="store_true", + help="pad audio to the longest one in the batch if true", + ) + parser.add_argument( + "--random-crop", + action="store_true", + help="always crop from the beginning if false", + ) + parser.add_argument( + "--single-target", + action="store_true", + help="if set, AddTargetDatasets outputs same keys " + "as AddTargetDataset", + ) + parser.add_argument( + "--batch-ratio", + default=None, + type=str, + help="ratio of bach size for each dataset", + ) + parser.add_argument( + "--sample-ratios", + default=None, + type=str, + help="ratio of sample for each dataset", + ) + parser.add_argument( + "--ctc-weight", + type=float, + default=0.0, + help="ctc weight for inference", + ) + + def __init__(self, args, dicts, config): + super().__init__(args) + self.dicts = dicts + self.config = config + self.t5_task = args.t5_task + # Used for filter size + if self.t5_task in ['s2t', 't2s', 's2s', 's2c']: + self.max_pos = [self.args.max_speech_positions * 256] + elif self.t5_task == 'pretrain': + self.max_pos = [self.args.max_speech_positions * 256, self.args.max_text_positions] + + self.mask_idx = self.dicts["text"].add_symbol("") + # add blank token for ctc + # if args.ctc_weight > 0: + self.blank_symbol_idx = self.dicts["text"].add_symbol("") + self.blank_symbol = "" + + # add mask token + if hasattr(args, "iid_noise_target") and args.iid_noise_target: + self.uni_mask_idxs = [] + for i in range(600): + self.uni_mask_idxs.append(self.dicts["text"].add_symbol("" + str(i))) + self.uni_mask_idxs = torch.tensor(self.uni_mask_idxs) + + self.seed = args.seed + + @classmethod + def setup_task(cls, args, **kwargs): + # load dictionaries and config + dicts = OrderedDict() + if args.t5_task == 'pretrain' and not hasattr(args, "shuffle_instance"): + args.shuffle_instance = False + + # Prepare config + config = None + logger.info('No config file for ' + args.t5_task) + + if args.t5_task == "pretrain": + dicts["hubert"] = [Dictionary.load(f"{args.hubert_label_dir}/dict.{label}.txt") for label in args.hubert_labels] + dicts["text"] = Dictionary.load(op.join(args.data, "dict.txt")) + else: + if config is None: + dicts["text"] = Dictionary.load(op.join(args.data, "dict.txt")) + else: + dicts["text"] = Dictionary.load(op.join(args.data, config.vocab_filename)) + + return cls(args, dicts, config) + + def build_criterion(self, args): + from fairseq import criterions + return criterions.build_criterion(args, self) + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + sample_ratios = [] + if self.t5_task == "s2t": + ## For speech to text task + bpe_tokenizer = self.build_bpe(self.args) + manifest = f"{self.args.data}/{split}.tsv" + procs = [LabelEncoder(self.dicts["text"])] + paths = [f"{self.args.hubert_label_dir}/{split}.txt"] + self.datasets[split] = SpeechToTextDataset( + manifest, + sample_rate=self.args.sample_rate, + label_paths=paths, + label_processors=procs, + max_keep_sample_size=self.max_pos[0] if self.args.max_speech_sample_size is None else self.args.max_speech_sample_size, + min_keep_sample_size=self.args.min_speech_sample_size, + normalize=self.args.normalize, + store_labels=False, + tgt_dict=self.dicts["text"], + tokenizer=bpe_tokenizer, + ) + elif self.t5_task == "t2s": + ## For text to speech task + from fairseq.data import ConcatDataset + bpe_tokenizer = self.build_bpe(self.args) + procs = [LabelEncoder(self.dicts["text"])] + t2s_datasets = [ + TextToSpeechDataset( + manifest_path=f"{self.args.data}/{name}.tsv", + sample_rate=self.args.sample_rate, + label_paths=[f"{self.args.hubert_label_dir}/{name}.txt"], + label_processors=procs, + max_keep_sample_size=self.max_pos[0], + normalize=self.args.normalize, + store_labels=False, + src_dict=self.dicts["text"], + tokenizer=bpe_tokenizer, + reduction_factor=self.args.reduction_factor, + ) + for name in split.split(",") + ] + self.datasets[split] = ConcatDataset(t2s_datasets) if len(t2s_datasets) > 1 else t2s_datasets[0] + elif self.t5_task == "s2s": + manifest = f"{self.args.data}/{split}.tsv" + self.datasets[split] = SpeechToSpeechDataset( + manifest_path=manifest, + sample_rate=self.args.sample_rate, + max_keep_sample_size=self.max_pos[0] if self.args.max_speech_sample_size is None else self.args.max_speech_sample_size, + min_keep_sample_size=self.args.min_speech_sample_size, + normalize=self.args.normalize, + reduction_factor=self.args.reduction_factor, + ) + elif self.t5_task == "s2c": + is_train_split = ("train" in split) + is_valid_split = ("valid" in split) + if is_train_split: + max_length = 51200 + elif is_valid_split: + max_length = 76800 + else: + max_length = 2560000 + manifest = op.join(f"{self.args.data}", f"{split}.tsv") + procs = LabelEncoder(self.dicts["text"]) # map speaker to id + self.datasets[split] = SpeechToClassDataset( + manifest_path=manifest, + sample_rate=self.args.sample_rate, + label_processors=procs, + max_keep_sample_size=self.max_pos[0] if self.args.max_speech_sample_size is None else self.args.max_speech_sample_size, + min_keep_sample_size=self.args.min_speech_sample_size, + normalize=self.args.normalize, + tgt_dict=self.dicts["text"], + max_length=max_length + ) + elif self.t5_task == "pretrain": + is_train_split = ("train" in split) + pretrain_datasets = [] + speech_split, text_split = split.split('|') + + ## Speech pre-train + manifest = f"{self.args.data}/{speech_split}.tsv" + dicts = self.dicts["hubert"] + pad_list = [dict.pad() for dict in dicts] + eos_list = [dict.eos() for dict in dicts] + procs = [LabelEncoder(dict) for dict in dicts] + paths = [ + f"{self.args.hubert_label_dir}/{speech_split}.{l}" for l in self.args.hubert_labels + ] + # hubert v1: pad_audio=True, random_crop=False; + self.args.dec_weight = getattr(self.args, "dec_weight", 1.0) + pretrain_datasets.append( + SpeechPretrainDataset( + manifest, + sample_rate=self.args.sample_rate, + label_paths=paths, + label_rates=self.args.label_rates, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=None, + min_keep_sample_size=32000, + max_sample_size=self.args.max_speech_sample_size, + pad_audio=self.args.pad_audio, + normalize=self.args.normalize, + store_labels=False, + random_crop=self.args.random_crop, + single_target=self.args.single_target, + reduction_factor=self.args.reduction_factor, + ) + ) + sample_ratios.append(sum([pretrain_datasets[0].size(i) for i in range(len(pretrain_datasets[0]))])) + + ## Text pre-train + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + split_path = op.join(data_path, text_split) + bart_dataset = data_utils.load_indexed_dataset( + split_path, + self.dicts["text"], + self.args.dataset_impl, + combine=combine, + ) + if bart_dataset is None: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(text_split, split_path) + ) + bart_dataset = StripTokenDataset(bart_dataset, self.dicts["text"].eos()) + bart_dataset = maybe_shorten_dataset( + bart_dataset, + text_split, + self.args.shorten_data_split_list, + self.args.shorten_method, + self.args.tokens_per_sample, + self.args.seed, + ) + # create continuous blocks of tokens + bart_dataset = TokenBlockDataset( + bart_dataset, + bart_dataset.sizes, + self.args.tokens_per_sample - 2, # one less for and one for + pad=self.dicts["text"].pad(), + eos=self.dicts["text"].eos(), + break_mode=self.args.sample_break_mode, + document_sep_len=0, + ) + # prepend beginning-of-sentence token (, equiv. to [CLS] in BERT) + bart_dataset = PrependTokenDataset(bart_dataset, self.dicts["text"].bos()) + bart_dataset = AppendTokenDataset(bart_dataset, self.dicts["text"].eos()) + mask_whole_words = ( + get_whole_word_mask(self.args, self.dicts["text"]) + if self.args.mask_length != "subword" + else None + ) + self.args.bert_weight = getattr(self.args, "bert_weight", 0.0) + pretrain_datasets.append( + TextPretrainDataset( + bart_dataset, + bart_dataset.sizes, + self.dicts["text"], + self.mask_idx, + mask_whole_words, + shuffle=self.args.shuffle_instance, + seed=self.seed, + args=self.args, + iid_noise_target=self.args.iid_noise_target, + uni_mask_idxs=self.uni_mask_idxs if self.args.iid_noise_target else None, + ) + ) + sample_ratios.append(sum(pretrain_datasets[1].sizes)) + logger.info( + "Task: {0}, Loaded {1} samples of denoising_dataset".format( + 'bart', + len(pretrain_datasets[1]), + ) + ) + + logger.info('token ratio is ' + str(sample_ratios)) + if self.args.batch_ratio is not None: + batch_ratio = eval(self.args.batch_ratio) + assert len(batch_ratio) == len(sample_ratios) + sample_ratios = [sample_ratios[i] / batch_ratio[i] for i in range(len(sample_ratios))] + else: + batch_ratio = None + max_size = max(sample_ratios) + sample_ratios = [max_size / r for r in sample_ratios] + if hasattr(self.args, "sample_ratios") and self.args.sample_ratios is not None: + sample_ratios = eval(self.args.sample_ratios) + if is_train_split: + self.datasets[split] = MultitaskDataset( + pretrain_datasets, sample_ratios, batch_ratio + ) + else: + self.datasets[split] = MultitaskDataset( + pretrain_datasets, batch_ratio=batch_ratio + ) + + def train_step( + self, sample, model, criterion, optimizer, update_num, ignore_grad=False + ): + model.train() + model.set_num_updates(update_num) + + # Junyi: not use sample_size, but normalize the loss locally + agg_loss, agg_sample_size, agg_logging_output = 0.0, 1.0, {} + agg_logging_output['sample_size'] = 1 + + def forward_backward(model, samples, weight=1.0): + nonlocal agg_loss, agg_logging_output + if samples is None or len(samples) == 0: + return + loss, sample_size, logging_output = criterion(model, samples) + if ignore_grad: + loss *= 0 + else: + loss *= weight + loss = loss / sample_size + optimizer.backward(loss) + agg_loss += loss.detach().item() + # # TODO make summing of the sample sizes configurable + for k in logging_output: + if k == 'ntokens' or k == 'nsentences': + if k not in agg_logging_output: + agg_logging_output[k] = 0 + agg_logging_output[k] += logging_output[k] + # continue + # agg_logging_output[k] += logging_output[k] + # agg_logging_output[task_name] += logging_output[k] + agg_logging_output[samples['task_name']] = logging_output + + forward_backward(model, sample) + + agg_logging_output["loss"] = agg_loss + + return agg_loss, agg_sample_size, agg_logging_output + + def valid_step(self, sample, model, criterion): + model.eval() + with torch.no_grad(): + from collections import defaultdict + + agg_loss, agg_sample_size, agg_logging_output = 0.0, 1.0, defaultdict(float) + agg_logging_output['sample_size'] = 1 + loss, sample_size, logging_output = criterion(model, sample) + loss = loss / sample_size + # agg_loss += loss.data.item() if isinstance(loss, torch.Tensor) else loss + agg_loss += loss.item() if isinstance(loss, torch.Tensor) else loss + agg_logging_output[sample['task_name']] = logging_output + agg_logging_output["loss"] = agg_loss + return agg_loss, agg_sample_size, agg_logging_output + + @property + def target_dictionary(self): + return self.dicts["text"] + + @property + def source_dictionary(self): + return None + + def build_model(self, args): + try: + args.input_feat_per_channel = self.config.input_feat_per_channel + args.input_channels = self.config.input_channels + except Exception as e: + args.input_feat_per_channel = 80 + args.input_channels = 1 + logger.info(f"Cannot set input_feat_per_channel, input_channels, since: ") + logger.warn(e) + logger.info(f"Set to: {args.input_feat_per_channel} and {args.input_channels}") + + args.speech_odim = args.input_feat_per_channel * args.input_channels + + args.label_rates = self.args.label_rates + args.sample_rate = self.args.sample_rate + self.args.reduction_factor = args.reduction_factor + return super(SpeechT5Task, self).build_model(args) + + def build_generator( + self, + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=None, + ): + from speecht5.sequence_generator import SequenceGenerator + extra_gen_cls_kwargs = { + "ctc_weight": self.args.ctc_weight, + **extra_gen_cls_kwargs + } + return super().build_generator( + models, args, seq_gen_cls=SequenceGenerator, extra_gen_cls_kwargs=extra_gen_cls_kwargs + ) + + def build_tokenizer(self, args): + if self.config is None: + logger.info(f"pre-tokenizer: None") + return encoders.build_tokenizer(Namespace(**{"tokenizer": None})) + else: + logger.info(f"pre-tokenizer: {self.config.pre_tokenizer}") + return encoders.build_tokenizer(Namespace(**self.config.pre_tokenizer)) + + def build_bpe(self, args): + if self.config is not None: + logger.info(f"tokenizer: {self.config.bpe_tokenizer}") + return encoders.build_bpe(Namespace(**self.config.bpe_tokenizer)) + else: + logger.info(f"tokenizer: {self.args.bpe_tokenizer}") + return encoders.build_bpe(Namespace(**{"bpe": "sentencepiece", "sentencepiece_model": self.args.bpe_tokenizer})) + + def generate_class(self, models, net_input, prefix_tokens, **kwargs): + with torch.no_grad(): + encoder_input = { + k: v for k, v in net_input.items() if k != "prev_output_tokens" and k != "task_name" + } + encoder_input.update(kwargs) + encoder_input.update({"prev_output_tokens": prefix_tokens}) + return models[0].generate_class(**encoder_input) + + def generate_speech(self, models, net_input, **kwargs): + with torch.no_grad(): + encoder_input = { + k: v for k, v in net_input.items() if k != "prev_output_tokens" and k != "task_name" + } + encoder_input.update(kwargs) + return models[0].generate_speech(**encoder_input) + + def inference_t2s( + self, models, sample + ): + with torch.no_grad(): + xs = sample['net_input']['src_tokens'] + spkemb = sample['net_input']['spkembs'] + return models[0].inference(xs, spkemb) + + def inference_s2s( + self, models, sample, force_equal_length=False + ): + with torch.no_grad(): + x = sample['net_input']['src_tokens'] + xlen = sample['net_input']['src_lengths'] + spkemb = sample['net_input']['spkembs'] + prev_output_tokens = sample['net_input']['prev_output_tokens'] + padding_mask = sample['net_input']['padding_mask'] + tgt_lengths = sample['net_input']['tgt_lengths'] + return models[0].inference_s2s(x, xlen, spkemb, prev_output_tokens, tgt_lengths, force_equal_length=force_equal_length, padding_mask=padding_mask) + + def inference_s2c( + self, models, sample + ): + with torch.no_grad(): + x = sample['net_input']['src_tokens'] + xlen = sample['net_input']['src_lengths'] + prev_output_tokens = sample['net_input']['prev_output_tokens'] + padding_mask = sample['net_input']['padding_mask'] + assert prev_output_tokens.size(1) == 1, prev_output_tokens.size() + return models[0].inference_s2c(x, xlen, prev_output_tokens, padding_mask=padding_mask) + + def filter_indices_by_size( + self, indices, dataset, max_positions=None, ignore_invalid_inputs=False + ): + """ + Filter examples that are too large + + Args: + indices (np.array): original array of sample indices + dataset (~fairseq.data.FairseqDataset): dataset to batch + max_positions (optional): max sentence length supported by the + model (default: None). + ignore_invalid_inputs (bool, optional): don't raise Exception for + sentences that are too long (default: False). + Returns: + np.array: array of filtered sample indices + """ + + indices, ignored = dataset.filter_indices_by_size( + indices, + self.max_pos + ) + return indices diff --git a/SpeechT5/SpeechT5/speecht5_framework.png b/SpeechT5/SpeechT5/speecht5_framework.png new file mode 100644 index 0000000000000000000000000000000000000000..1f86964fcb2a51c24b0a778365cc3733484ed470 Binary files /dev/null and b/SpeechT5/SpeechT5/speecht5_framework.png differ diff --git a/SpeechT5/SpeechUT/README.md b/SpeechT5/SpeechUT/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ca9425c781872c5589fca685e699d7c41ea08485 --- /dev/null +++ b/SpeechT5/SpeechUT/README.md @@ -0,0 +1,203 @@ +# SpeechUT + + + [**SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training**](https://arxiv.org/abs/2210.03730) + + +- (Done) Oct 2022: release the code and models +- Oct 2022: release preprint in [arXiv](https://arxiv.org/abs/2210.03730) + +## Pre-Trained and Fine-tuned Models +| Model | Pre-training Dataset (unlabeled) | Fine-tuning Dataset (labeled) | Model | +| :------: | :----------------------------------------------: | :-----------------: | :-----: | +| SpeechUT Base (ASR) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4asr_32gpu_1accum/checkpoint_298_400000.pt?sv=2020-04-08&st=2023-03-08T01%3A39%3A48Z&se=2024-03-09T01%3A39%3A00Z&sr=b&sp=r&sig=l3gJS1D%2BJfLfNfS3z33WjmSMGrOBJ63CvqGGewC6WeU%3D)| +| SpeechUT Base (ASR) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [100 hrs LibriSpeech](http://www.openslr.org/12) | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/speechut_base_asr100h_checkpoint_best.pt?sv=2020-04-08&st=2023-03-08T01%3A41%3A22Z&se=2024-03-09T01%3A41%3A00Z&sr=b&sp=r&sig=%2B9lpGrqtZXa%2F6n1uZT%2Biey54ky31bYKSJytgfnBbbN4%3D)| +| SpeechUT Large (ASR) | [60k hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/large_speechut4asr_32gpu_4accum/checkpoint_22_400k.pt?sv=2020-04-08&st=2023-03-08T01%3A42%3A10Z&se=2024-03-09T01%3A42%3A00Z&sr=b&sp=r&sig=TZNcsHQAqapyj%2BAvpHtl749kZy9flTkWm8P5L4W26qs%3D)| +| SpeechUT Large (ASR) | [60k hrs LibriSpeech](http://www.openslr.org/12) + [40M Text](http://www.openslr.org/11) | [960 hrs LibriSpeech](http://www.openslr.org/12) | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/speechut_large_asr960h_checkpoint_best.pt?sv=2020-04-08&st=2023-03-08T01%3A43%3A02Z&se=2024-03-09T01%3A43%3A00Z&sr=b&sp=r&sig=PmO%2BgSAMXRgMC7GfpS4c%2BrDPsfJGekqUzD5AJm7RrYU%3D)| +| SpeechUT Base (En-De) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [408 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [4.6M Text](https://www.statmt.org/wmt16/) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4ende_32gpu_1accum/checkpoint_217_400000.pt?sv=2020-04-08&st=2023-03-08T01%3A43%3A47Z&se=2024-03-09T01%3A43%3A00Z&sr=b&sp=r&sig=XDEesMdGQ027j7YtpSql1kZtwgfv39gruOuWYlKlJ7w%3D)| +| SpeechUT Base (En-De) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [408 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [4.6M Text](https://www.statmt.org/wmt16/) | [En-De MuST-C v1](https://ict.fbk.eu/must-c/) | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4ende_32gpu_1accum/fineutne_ende_checkpoint_avg.pt?sv=2020-04-08&st=2023-03-08T01%3A44%3A15Z&se=2024-03-09T01%3A44%3A00Z&sr=b&sp=r&sig=8dcenahRg46EJdwiHUalVBJgKra6JoSN7tUxdLAwzOM%3D)| +| SpeechUT Base (En-Es) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [504 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [15M Text](https://www.statmt.org/wmt13/) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4enes_32gpu_1accum/checkpoint_204_400000.pt?sv=2020-04-08&st=2023-03-08T01%3A48%3A16Z&se=2024-03-09T01%3A48%3A00Z&sr=b&sp=r&sig=hWoCM0y0SGZTD4CznC%2F5CejFczkqDYTOaISmlhCAYAU%3D)| +| SpeechUT Base (En-Es) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [504 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [15M Text](https://www.statmt.org/wmt13/) | [En-Es MuST-C v1](https://ict.fbk.eu/must-c/) | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4enes_32gpu_1accum/fineutne_enes_checkpoint_avg.pt?sv=2020-04-08&st=2023-03-08T01%3A48%3A41Z&se=2024-03-09T01%3A48%3A00Z&sr=b&sp=r&sig=KGfzgKfKkDVQI0JxxnS%2BsYdBQzhUqFLQAVYG0OSGBtk%3D)| +| SpeechUT Base (En-Fr) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [492 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [40M Text](https://www.statmt.org/wmt14/) | - | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4enfr_32gpu_1accum/checkpoint_297_600000.pt?sv=2020-04-08&st=2023-03-08T01%3A49%3A09Z&se=2024-03-09T01%3A49%3A00Z&sr=b&sp=r&sig=1eqpXMLCjWpfyd7AiOHGzfk%2B8ZYqWwVWdHk1GqXgoeg%3D)| +| SpeechUT Base (En-Fr) | [960 hrs LibriSpeech](http://www.openslr.org/12) + [492 hrs MuST-C v1](https://ict.fbk.eu/must-c/) + [40M Text](https://www.statmt.org/wmt14/) | [En-Fr MuST-C v1](https://ict.fbk.eu/must-c/) | [Azure Storage](https://valle.blob.core.windows.net/share/speechut/base_speechut4enfr_32gpu_1accum/fineutne_enfr_checkpoint.pt?sv=2020-04-08&st=2023-03-08T01%3A49%3A34Z&se=2024-03-09T01%3A49%3A00Z&sr=b&sp=r&sig=i3jMAqvL1Vp7DRjACAbrdoQKhlv2Cmi40%2F14SJ6%2BoiU%3D)| + + +## Language Model +See [here](https://github.com/microsoft/SpeechT5/tree/main/Speech2C#language-model-and-vocabulary). + + +## Setup + +```bash +git submodule update --init SpeechUT/fairseq +cd SpeechUT/ +pip install --editable fairseq/ +pip install sacrebleu==1.5.1 +``` + + +## ASR on LibriSpeech +### Data preparation +Please follow the steps of wav2vec 2.0 manifest [here](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec#prepare-training-data-manifest) to prepare `train.tsv` and `train.ltr`. You should make sure the vocabulary [`dict.ltr.txt`](dataset/LibriSpeech/dict.ltr.txt) is the same as that used for the pre-trained model. Put yout prepared data into `$data_dir`. + +### Fine-tune a hybrid CTC-ED model +- Fine-tune the base model on 100h subset + ```bash + # Usage: speechut/scripts/tune_speechut_asr/finetune_base_edctc.sh [mount=$PWD] [world_size=8] [update_freq=2] + model_path=path/to/your/pre-trained/model + data_dir=dataset/LibriSpeech/asr + bash speechut/scripts/tune_speechut_asr/finetune_base_edctc.sh $model_path $data_dir 'tag400k' + ``` + +- Fine-tune the large model on 960h subset + ```bash + # Usage: speechut/scripts/tune_speechut_asr/finetune960h_large_edctc.sh [mount=$PWD] [world_size=8] [update_freq=3] + model_path=path/to/your/pre-trained/model + data_dir=dataset/LibriSpeech/asr + bash speechut/scripts/tune_speechut_asr/finetune960h_large_edctc.sh $model_path $data_dir 'tag400k' + ``` + +### Decode +- CTC-ED joint decoding + ```bash + # Usage: speechut/scripts/tune_speechut_asr/inference_edctc.sh [gen-set=dev_other] [beam_size=10] [ctc_weight=0.2] [--normalize] + model_path=path/to/your/fine-tuned/model + data_dir=dataset/LibriSpeech/asr + # for base model + bash speechut/scripts/tune_speechut_asr/inference_edctc.sh $model_path $data_dir test_clean 10 0.2 + # for large model, you should set --normalize at the end + bash speechut/scripts/tune_speechut_asr/inference_edctc.sh $model_path $data_dir test_clean 10 0.2 --normalize + ``` + > We use the [espnet](https://github.com/espnet/espnet)-style joint decoding algorithm, currently only supporting batch_size=1. If you find it too slow, please check [`inference_nj.sh`](speechut/scripts/tune_speechut_asr/inference_nj.sh) for a multi-thread version. + +- CTC-ED joint decoding with LM + ```bash + # Usage: speechut/scripts/tune_speechut_asr/inference_edctclm.sh [gen-set=dev_other] [beam_size=30] [ctc_weight=0.3] [lm_weight=0.7] [lm_path] [--normalize] + model_path=path/to/your/fine-tuned/model + data_dir=dataset/LibriSpeech/asr + lm_path=path/to/char_lm/model + # for base model + bash speechut/scripts/tune_speechut_asr/inference_edctclm.sh $model_path $data_dir test_clean 30 0.3 0.7 $lm_path + # for large model, you should set --normalize at the end + bash speechut/scripts/tune_speechut_asr/inference_edctclm.sh $model_path $data_dir test_clean 30 0.3 0.7 $lm_path --normalize + ``` + + > We currently only support batch_size=1. If you find it too slow, please check [`inference_lm_nj.sh`](speechut/scripts/tune_speechut_asr/inference_lm_nj.sh) for a multi-thread version. + + > The released language model uses a different vocaburary [`dict.txt`](dataset/LibriSpeech/dict.txt), put it into `$data_dir` and the script will access it. + + +## ST on MuST-C +### Data preparation + +ST models are fine-tuned with [fairseq speech-to-text](https://github.com/facebookresearch/fairseq/tree/main/examples/speech_to_text) task, so just follow the data preparation instructions [here](https://github.com/facebookresearch/fairseq/tree/main/examples/speech_to_text#data-preparation). +To fine-tune our released models, you should use the same sentecepiece models and dictionaries as ours: + +- En-De: [sentencepiece_model](dataset/MuSTC/en_de/spm_unigram10000.model), [dict](dataset/MuSTC/en_de/dict.spm.txt) +- En-Es: [sentencepiece_model](dataset/MuSTC/en_es/spm_unigram10000.model), [dict](dataset/MuSTC/en_es/dict.spm.txt) +- En-Fr: [sentencepiece_model](dataset/MuSTC/en_fr/spm_unigram10000.model), [dict](dataset/MuSTC/en_fr/dict.spm.txt) + +We provided examples in [`dataset`](dataset/MuSTC). + +### Fine-tune an encoder-decoder model + +```bash +# Usage: speechut/scripts/tune_speechut_st/finetune_base_mustc_enxx.sh [mount=$PWD] [world_size=8] [update_freq=4/6] +model_path=path/to/your/pre-trained/model +data_dir=dataset/MuSTC/en-${lang} +bash speechut/scripts/tune_speechut_st/finetune_base_mustc_enxx.sh $model_path $data_dir ${lang} tag400k +``` +Please check the script [`finetune_base_mustc_enxx.sh`](speechut/scripts/tune_speechut_st/finetune_base_mustc_enxx.sh) for detailed configuration. + +### Decode +You might average several model checkpoints with the best dev accuracy to stablize the performance, +```bash +python fairseq/scripts/average_checkpoints.py --inputs $model_dir/checkpoint.best_acc*.pt --output $model_dir/checkpoint.avgnbest.pt +``` +Then decode the model with beam search, +```bash +# Usage: speechut/scripts/tune_speechut_st/inference_st.sh [gen-set=dev] [beam_size=10] [lenpen=1.0] +model_path=path/to/your/fine-tuned/model +data_dir=dataset/MuSTC/en-${lang} +bash speechut/scripts/tune_speechut_st/inference_st.sh $model_path $data_dir ${lang} tst-COMMON +``` + + + + +## Pre-train for ASR + +### Data preparation +The model is pre-trained by speech-to-unit, unit-to-text and mask-unit-lm tasks. +1. For speech-to-unit task, please follow the steps of data preparation for HuBERT [here](https://github.com/facebookresearch/fairseq/tree/main/examples/hubert#data-preparation). +2. For unit-to-text task, follow the steps below: + - Generate units from unpaired text by [T2U Generator](#T2U-Generator). + - Pair the generated units and text data, convert them to binary files. +3. For mask-unit-lm task, combine the units generated from step1 and step2 together. + +You should use [`dict.ltr.txt`](dataset/LibriSpeech/dict.ltr.txt) when preparing the text data, make sure the dictionary is the same as that used for fine-tuning. + +### Pre-train base model + +```bash +# Usage: speechut/scripts/pretrain_speechut/base_speechut_for_asr.sh [mount=$PWD] [world_size=32] [update_freq=1] +data_dir= +text_data_dir= +bash speechut/scripts/pretrain_speechut/base_speechut_for_asr.sh $data_dir $text_data_dir +``` + +## Pre-train for ST + +### Data preparation +The model is pre-trained by speech-to-unit, unit-to-text and mask-unit-lm tasks. +1. For speech-to-unit task, please follow the steps of data preparation for HuBERT [here](https://github.com/facebookresearch/fairseq/tree/main/examples/hubert#data-preparation). +2. For unit-to-text task, we use bilingual text where the source side (i.e. English) is used to generate unit and the target side serves as the output. Follow the steps below: + - Normalize the source (English) text by removing punctuation, converting capital letters. + - Generate units from the source (English) text by [T2U Generator](#T2U-Generator). + - Pair the generated units and text data, convert them to binary files. +3. For mask-unit-lm task, combine the units generated from step1 and step2 together. +You should use the same sentencepiece models and dictionaries as that used for [fine-tuning](#ST-on-MuST-C). + + +### Pre-train base model + +```bash +# Usage: speechut/scripts/pretrain_speechut/base_speechut_for_st.sh [mount=$PWD] [world_size=32] [update_freq=1] +data_dir= +text_data_dir= +bash speechut/scripts/pretrain_speechut/base_speechut_for_st.sh $data_dir $text_data_dir ${lang} +``` + + +## T2U Generator +The original paper trains an encoder-decoder model to generate reduced units from text, which is time consuming due to the autoregressive generation. +We recently update the T2U generator to a non-autoregressive model, which generates non-reduced units (can be easily post-processed to reduced units). Please follow the usage provided by [Hidden-unit Tokenizer for Text](https://github.com/microsoft/SpeechT5/tree/main/SpeechLM#hidden-unit-tokenizer-for-text) (they used the same HuBERT units as this work). + + +## License + +This project is licensed under the license found in the LICENSE file in the root directory of this source tree. +Portions of the source code are based on the [FAIRSEQ](https://github.com/pytorch/fairseq). + +[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct) + +## Reference + +If you find our work is useful in your research, please cite the following paper: + +```bibtex +@article{zhang2022speechut, + title = {SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training}, + author = {Zhang, Ziqiang and Zhou, Long and Ao, Junyi and Liu, Shujie and Dai, Lirong and Li, Jinyu and Wei, Furu}, + eprint={2210.03730}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + year={2022} +} +``` + +### Contact Information + +For help or issues using SpeechUT models, please submit a GitHub issue. + +For other communications related to SpeechUT, please contact Long Zhou (`lozhou@microsoft.com`). \ No newline at end of file diff --git a/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.km.txt b/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.km.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbfe59e554d6234f3631d8d09d9281c2160f4675 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.km.txt @@ -0,0 +1,500 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 +364 364 +365 365 +366 366 +367 367 +368 368 +369 369 +370 370 +371 371 +372 372 +373 373 +374 374 +375 375 +376 376 +377 377 +378 378 +379 379 +380 380 +381 381 +382 382 +383 383 +384 384 +385 385 +386 386 +387 387 +388 388 +389 389 +390 390 +391 391 +392 392 +393 393 +394 394 +395 395 +396 396 +397 397 +398 398 +399 399 +400 400 +401 401 +402 402 +403 403 +404 404 +405 405 +406 406 +407 407 +408 408 +409 409 +410 410 +411 411 +412 412 +413 413 +414 414 +415 415 +416 416 +417 417 +418 418 +419 419 +420 420 +421 421 +422 422 +423 423 +424 424 +425 425 +426 426 +427 427 +428 428 +429 429 +430 430 +431 431 +432 432 +433 433 +434 434 +435 435 +436 436 +437 437 +438 438 +439 439 +440 440 +441 441 +442 442 +443 443 +444 444 +445 445 +446 446 +447 447 +448 448 +449 449 +450 450 +451 451 +452 452 +453 453 +454 454 +455 455 +456 456 +457 457 +458 458 +459 459 +460 460 +461 461 +462 462 +463 463 +464 464 +465 465 +466 466 +467 467 +468 468 +469 469 +470 470 +471 471 +472 472 +473 473 +474 474 +475 475 +476 476 +477 477 +478 478 +479 479 +480 480 +481 481 +482 482 +483 483 +484 484 +485 485 +486 486 +487 487 +488 488 +489 489 +490 490 +491 491 +492 492 +493 493 +494 494 +495 495 +496 496 +497 497 +498 498 +499 499 diff --git a/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.kmu.txt b/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.kmu.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbfe59e554d6234f3631d8d09d9281c2160f4675 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.kmu.txt @@ -0,0 +1,500 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 +364 364 +365 365 +366 366 +367 367 +368 368 +369 369 +370 370 +371 371 +372 372 +373 373 +374 374 +375 375 +376 376 +377 377 +378 378 +379 379 +380 380 +381 381 +382 382 +383 383 +384 384 +385 385 +386 386 +387 387 +388 388 +389 389 +390 390 +391 391 +392 392 +393 393 +394 394 +395 395 +396 396 +397 397 +398 398 +399 399 +400 400 +401 401 +402 402 +403 403 +404 404 +405 405 +406 406 +407 407 +408 408 +409 409 +410 410 +411 411 +412 412 +413 413 +414 414 +415 415 +416 416 +417 417 +418 418 +419 419 +420 420 +421 421 +422 422 +423 423 +424 424 +425 425 +426 426 +427 427 +428 428 +429 429 +430 430 +431 431 +432 432 +433 433 +434 434 +435 435 +436 436 +437 437 +438 438 +439 439 +440 440 +441 441 +442 442 +443 443 +444 444 +445 445 +446 446 +447 447 +448 448 +449 449 +450 450 +451 451 +452 452 +453 453 +454 454 +455 455 +456 456 +457 457 +458 458 +459 459 +460 460 +461 461 +462 462 +463 463 +464 464 +465 465 +466 466 +467 467 +468 468 +469 469 +470 470 +471 471 +472 472 +473 473 +474 474 +475 475 +476 476 +477 477 +478 478 +479 479 +480 480 +481 481 +482 482 +483 483 +484 484 +485 485 +486 486 +487 487 +488 488 +489 489 +490 490 +491 491 +492 492 +493 493 +494 494 +495 495 +496 496 +497 497 +498 498 +499 499 diff --git a/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.ltr.txt b/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.ltr.txt new file mode 100644 index 0000000000000000000000000000000000000000..26a7e6ba309998c3868db7ecab5d7afa52a68e52 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.ltr.txt @@ -0,0 +1,29 @@ +| 803288730 +E 439294199 +T 319071758 +A 277306732 +O 263784364 +N 239361162 +I 237353011 +H 223346762 +S 220175453 +R 203352500 +D 152198685 +L 141597450 +U 98913389 +M 87138757 +C 84680142 +W 81375101 +F 80240665 +G 70642902 +Y 68388038 +P 58436929 +B 52538531 +V 33250231 +K 26906609 +' 9162896 +X 5075632 +J 4746771 +Q 3401794 +Z 2186971 + 1 diff --git a/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.txt b/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.txt new file mode 100644 index 0000000000000000000000000000000000000000..69929e1666c8182148d83ef4332e4c677bb90e5a --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/LibriSpeech/dict.txt @@ -0,0 +1,28 @@ +| 94802 +E 51860 +T 38431 +A 33152 +O 31495 +N 28855 +I 28794 +H 27187 +S 26071 +R 23546 +D 18289 +L 16308 +U 12400 +M 10685 +W 10317 +C 9844 +F 9062 +G 8924 +Y 8226 +P 6890 +B 6339 +V 3936 +K 3456 +' 1023 +X 636 +J 598 +Q 437 +Z 213 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/dict.km.txt b/SpeechT5/SpeechUT/dataset/MuSTC/dict.km.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbfe59e554d6234f3631d8d09d9281c2160f4675 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/dict.km.txt @@ -0,0 +1,500 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 +364 364 +365 365 +366 366 +367 367 +368 368 +369 369 +370 370 +371 371 +372 372 +373 373 +374 374 +375 375 +376 376 +377 377 +378 378 +379 379 +380 380 +381 381 +382 382 +383 383 +384 384 +385 385 +386 386 +387 387 +388 388 +389 389 +390 390 +391 391 +392 392 +393 393 +394 394 +395 395 +396 396 +397 397 +398 398 +399 399 +400 400 +401 401 +402 402 +403 403 +404 404 +405 405 +406 406 +407 407 +408 408 +409 409 +410 410 +411 411 +412 412 +413 413 +414 414 +415 415 +416 416 +417 417 +418 418 +419 419 +420 420 +421 421 +422 422 +423 423 +424 424 +425 425 +426 426 +427 427 +428 428 +429 429 +430 430 +431 431 +432 432 +433 433 +434 434 +435 435 +436 436 +437 437 +438 438 +439 439 +440 440 +441 441 +442 442 +443 443 +444 444 +445 445 +446 446 +447 447 +448 448 +449 449 +450 450 +451 451 +452 452 +453 453 +454 454 +455 455 +456 456 +457 457 +458 458 +459 459 +460 460 +461 461 +462 462 +463 463 +464 464 +465 465 +466 466 +467 467 +468 468 +469 469 +470 470 +471 471 +472 472 +473 473 +474 474 +475 475 +476 476 +477 477 +478 478 +479 479 +480 480 +481 481 +482 482 +483 483 +484 484 +485 485 +486 486 +487 487 +488 488 +489 489 +490 490 +491 491 +492 492 +493 493 +494 494 +495 495 +496 496 +497 497 +498 498 +499 499 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/dict.kmu.txt b/SpeechT5/SpeechUT/dataset/MuSTC/dict.kmu.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbfe59e554d6234f3631d8d09d9281c2160f4675 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/dict.kmu.txt @@ -0,0 +1,500 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 +364 364 +365 365 +366 366 +367 367 +368 368 +369 369 +370 370 +371 371 +372 372 +373 373 +374 374 +375 375 +376 376 +377 377 +378 378 +379 379 +380 380 +381 381 +382 382 +383 383 +384 384 +385 385 +386 386 +387 387 +388 388 +389 389 +390 390 +391 391 +392 392 +393 393 +394 394 +395 395 +396 396 +397 397 +398 398 +399 399 +400 400 +401 401 +402 402 +403 403 +404 404 +405 405 +406 406 +407 407 +408 408 +409 409 +410 410 +411 411 +412 412 +413 413 +414 414 +415 415 +416 416 +417 417 +418 418 +419 419 +420 420 +421 421 +422 422 +423 423 +424 424 +425 425 +426 426 +427 427 +428 428 +429 429 +430 430 +431 431 +432 432 +433 433 +434 434 +435 435 +436 436 +437 437 +438 438 +439 439 +440 440 +441 441 +442 442 +443 443 +444 444 +445 445 +446 446 +447 447 +448 448 +449 449 +450 450 +451 451 +452 452 +453 453 +454 454 +455 455 +456 456 +457 457 +458 458 +459 459 +460 460 +461 461 +462 462 +463 463 +464 464 +465 465 +466 466 +467 467 +468 468 +469 469 +470 470 +471 471 +472 472 +473 473 +474 474 +475 475 +476 476 +477 477 +478 478 +479 479 +480 480 +481 481 +482 482 +483 483 +484 484 +485 485 +486 486 +487 487 +488 488 +489 489 +490 490 +491 491 +492 492 +493 493 +494 494 +495 495 +496 496 +497 497 +498 498 +499 499 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_de/config.yaml b/SpeechT5/SpeechUT/dataset/MuSTC/en_de/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dce5f63011a8c33a4d12eec569fdcc91ea299f68 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_de/config.yaml @@ -0,0 +1,3 @@ +vocab_filename: dict.spm.txt +src_vocab_filename: dict.kmu.txt + diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_de/config_ende.yaml b/SpeechT5/SpeechUT/dataset/MuSTC/en_de/config_ende.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd080a05500211cade57d80056c8ce311ce4c0c2 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_de/config_ende.yaml @@ -0,0 +1,14 @@ +bpe_tokenizer: + bpe: sentencepiece + sentencepiece_model: spm_unigram10000.model + +sampling_alpha: 1.0 +shuffle: false +use_audio_input: true +use_sample_rate: 16000 + +vocab_filename: dict.spm.txt + +# required by speech_to_text task but never used +input_channels: 1 +input_feat_per_channel: 1 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_de/dict.kmu.txt b/SpeechT5/SpeechUT/dataset/MuSTC/en_de/dict.kmu.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbfe59e554d6234f3631d8d09d9281c2160f4675 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_de/dict.kmu.txt @@ -0,0 +1,500 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 +364 364 +365 365 +366 366 +367 367 +368 368 +369 369 +370 370 +371 371 +372 372 +373 373 +374 374 +375 375 +376 376 +377 377 +378 378 +379 379 +380 380 +381 381 +382 382 +383 383 +384 384 +385 385 +386 386 +387 387 +388 388 +389 389 +390 390 +391 391 +392 392 +393 393 +394 394 +395 395 +396 396 +397 397 +398 398 +399 399 +400 400 +401 401 +402 402 +403 403 +404 404 +405 405 +406 406 +407 407 +408 408 +409 409 +410 410 +411 411 +412 412 +413 413 +414 414 +415 415 +416 416 +417 417 +418 418 +419 419 +420 420 +421 421 +422 422 +423 423 +424 424 +425 425 +426 426 +427 427 +428 428 +429 429 +430 430 +431 431 +432 432 +433 433 +434 434 +435 435 +436 436 +437 437 +438 438 +439 439 +440 440 +441 441 +442 442 +443 443 +444 444 +445 445 +446 446 +447 447 +448 448 +449 449 +450 450 +451 451 +452 452 +453 453 +454 454 +455 455 +456 456 +457 457 +458 458 +459 459 +460 460 +461 461 +462 462 +463 463 +464 464 +465 465 +466 466 +467 467 +468 468 +469 469 +470 470 +471 471 +472 472 +473 473 +474 474 +475 475 +476 476 +477 477 +478 478 +479 479 +480 480 +481 481 +482 482 +483 483 +484 484 +485 485 +486 486 +487 487 +488 488 +489 489 +490 490 +491 491 +492 492 +493 493 +494 494 +495 495 +496 496 +497 497 +498 498 +499 499 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_de/dict.spm.txt b/SpeechT5/SpeechUT/dataset/MuSTC/en_de/dict.spm.txt new file mode 100644 index 0000000000000000000000000000000000000000..6f45c562c35023a09b76baa5bbbb38243ef0654c --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_de/dict.spm.txt @@ -0,0 +1,9997 @@ +, 1 +. 1 +▁die 1 +▁der 1 +en 1 +▁und 1 +s 1 +e 1 +▁ 1 +n 1 +▁in 1 +▁zu 1 +▁den 1 +▁von 1 +▁ist 1 +- 1 +▁das 1 +▁für 1 +er 1 +▁auf 1 +▁mit 1 +▁ein 1 +▁eine 1 +▁des 1 +▁nicht 1 +▁Sie 1 +▁wir 1 +▁dass 1 +▁es 1 +t 1 +▁im 1 +▁werden 1 +▁sich 1 +r 1 +▁dem 1 +▁ich 1 +▁an 1 +▁Die 1 +▁sind 1 +▁auch 1 +▁sie 1 +▁über 1 +▁um 1 +▁wird 1 +▁als 1 +: 1 +▁haben 1 +▁( 1 +▁aus 1 +▁wie 1 +es 1 +▁oder 1 +▁Ich 1 +) 1 +▁hat 1 +▁einer 1 +▁Das 1 +▁- 1 +d 1 +▁bei 1 +▁einen 1 +▁können 1 +m 1 +▁zur 1 +▁diese 1 +▁vor 1 +▁Wir 1 +▁er 1 +▁uns 1 +▁so 1 +▁nach 1 +▁einem 1 +ung 1 +▁Es 1 +▁durch 1 +▁" 1 +▁nur 1 +▁kann 1 +▁ver 1 +▁be 1 +? 1 +▁zum 1 +▁wenn 1 +▁In 1 +▁dieser 1 +y 1 +▁man 1 +▁aber 1 +▁war 1 +▁noch 1 +▁sein 1 +▁Der 1 +▁Kommission 1 +▁was 1 +st 1 +te 1 +▁wurde 1 +▁sehr 1 +k 1 +▁daß 1 +! 1 +▁müssen 1 +▁ge 1 +▁Herr 1 +▁S 1 +▁Und 1 +▁– 1 +▁mehr 1 +ten 1 +a 1 +in 1 +▁alle 1 +o 1 +▁diesem 1 +▁unter 1 +▁am 1 +be 1 +▁Ein 1 +" 1 +z 1 +de 1 +▁hier 1 +▁Union 1 +▁B 1 +▁möchte 1 +ge 1 +den 1 +▁ihre 1 +▁gibt 1 +▁Er 1 +▁E 1 +▁Europäischen 1 +f 1 +▁A 1 +al 1 +▁bis 1 +b 1 +▁Ver 1 +S 1 +l 1 +g 1 +▁dieses 1 +B 1 +an 1 +/ 1 +▁An 1 +▁Menschen 1 +▁unsere 1 +▁Be 1 +▁Bericht 1 +▁keine 1 +▁dann 1 +w 1 +▁muss 1 +▁eines 1 +▁Wenn 1 +▁K 1 +▁vom 1 +▁Präsident 1 +▁Zeit 1 +▁ab 1 +h 1 +▁Ihnen 1 +ver 1 +▁Welt 1 +▁zwischen 1 +▁EU 1 +▁mich 1 +▁habe 1 +▁immer 1 +▁Parlament 1 +le 1 +ing 1 +▁anderen 1 +T 1 +ch 1 +▁sowie 1 +▁da 1 +▁C 1 +C 1 +▁F 1 +i 1 +re 1 +der 1 +u 1 +c 1 +▁Europa 1 +; 1 +et 1 +▁Im 1 +▁damit 1 +D 1 +▁diesen 1 +ur 1 +▁Aus 1 +▁Hotel 1 +▁G 1 +P 1 +ieren 1 +sch 1 +▁zwei 1 +▁Mitgliedstaaten 1 +A 1 +M 1 +▁Diese 1 +F 1 +▁„ 1 +▁W 1 +▁de 1 +▁gegen 1 +ar 1 +▁Ab 1 +em 1 +ter 1 +▁Frau 1 +▁wurden 1 +▁M 1 +▁sondern 1 +▁Vor 1 +K 1 +lich 1 +sten 1 +▁andere 1 +▁neue 1 +▁unserer 1 +ä 1 +▁Jahr 1 +ern 1 +▁Entwicklung 1 +▁Aber 1 +la 1 +▁weil 1 +▁T 1 +▁heute 1 +▁Auf 1 +▁mir 1 +it 1 +▁seine 1 +▁wo 1 +▁machen 1 +▁Ihre 1 +ischen 1 +▁selbst 1 +▁viele 1 +▁Re 1 +▁Europäische 1 +▁un 1 +▁Jahren 1 +▁P 1 +▁etwas 1 +▁meine 1 +▁D 1 +▁ganz 1 +▁Mit 1 +▁viel 1 +▁Rat 1 +▁also 1 +▁Ge 1 +▁sollte 1 +▁Frage 1 +▁wieder 1 +“ 1 +▁jedoch 1 +zu 1 +ein 1 +iert 1 +us 1 +▁ohne 1 +li 1 +▁ihrer 1 +ig 1 +▁jetzt 1 +W 1 +E 1 +▁bereits 1 +▁sollten 1 +▁bin 1 +ö 1 +▁Sch 1 +▁würde 1 +▁L 1 +▁ihr 1 +el 1 +at 1 +ungen 1 +▁Land 1 +▁ob 1 +▁Arbeit 1 +▁geht 1 +G 1 +▁So 1 +is 1 +ut 1 +▁dazu 1 +▁neuen 1 +▁europäischen 1 +or 1 +▁sehen 1 +▁O 1 +▁3 1 +▁einige 1 +um 1 +me 1 +ab 1 +N 1 +▁H 1 +2 1 +▁a 1 +▁Da 1 +▁sagen 1 +▁re 1 +▁finden 1 +p 1 +se 1 +▁I 1 +▁einfach 1 +▁wirklich 1 +▁Wie 1 +▁schon 1 +▁weiter 1 +▁Jahre 1 +igen 1 +▁darauf 1 +ra 1 +▁19 1 +▁tun 1 +il 1 +ta 1 +on 1 +▁Art 1 +' 1 +▁Recht 1 +H 1 +▁dafür 1 +ische 1 +▁1 1 +▁Unternehmen 1 +gen 1 +O 1 +I 1 +▁Teil 1 +▁the 1 +ma 1 +▁wollen 1 +▁Ziel 1 +ne 1 +▁Problem 1 +▁gut 1 +▁2 1 +▁allen 1 +ste 1 +▁Was 1 +▁R 1 +ri 1 +▁Um 1 +▁waren 1 +ed 1 +▁seiner 1 +é 1 +▁liegt 1 +▁Eine 1 +▁denn 1 +▁Maßnahmen 1 +▁pro 1 +ie 1 +R 1 +ir 1 +▁Bürger 1 +▁ihren 1 +ation 1 +▁nun 1 +▁möglich 1 +▁Bereich 1 +▁Nach 1 +▁La 1 +▁Le 1 +▁drei 1 +▁du 1 +▁Daten 1 +▁hatte 1 +▁k 1 +ü 1 +▁Zu 1 +▁zusammen 1 +▁während 1 +▁f 1 +▁Lage 1 +bar 1 +aus 1 +▁wissen 1 +▁bietet 1 +▁Informationen 1 +▁zurück 1 +liche 1 +to 1 +▁Grund 1 +U 1 +ag 1 +▁allem 1 +L 1 +▁Herrn 1 +ft 1 +schaft 1 +ro 1 +▁seit 1 +▁Dies 1 +▁doch 1 +▁N 1 +ierung 1 +▁— 1 +ia 1 +00 1 +▁Länder 1 +▁wäre 1 +▁unseren 1 +▁wichtig 1 +▁natürlich 1 +▁Z 1 +▁geben 1 +ol 1 +▁Leben 1 +▁Man 1 +▁St 1 +1 1 +▁ersten 1 +lichen 1 +isch 1 +▁d 1 +▁Bei 1 +chen 1 +▁könnte 1 +▁Rahmen 1 +▁Stadt 1 +ik 1 +un 1 +v 1 +). 1 +▁Fall 1 +▁einmal 1 +▁Euro 1 +▁ent 1 +▁Ma 1 +und 1 +▁denen 1 +x 1 +▁Für 1 +▁welche 1 +▁Un 1 +▁davon 1 +▁lassen 1 +▁Kinder 1 +▁Vorschlag 1 +▁Se 1 +3 1 +mal 1 +▁Zimmer 1 +kt 1 +... 1 +▁darüber 1 +▁Energie 1 +▁4 1 +▁Weg 1 +▁V 1 +▁ihnen 1 +▁Beispiel 1 +▁alles 1 +▁letzten 1 +▁hin 1 +▁soll 1 +ner 1 +▁of 1 +▁10 1 +ka 1 +▁b 1 +▁Parlaments 1 +▁Regierung 1 +▁insbesondere 1 +▁Unterstützung 1 +▁meiner 1 +▁dabei 1 +▁and 1 +▁erhalten 1 +▁kein 1 +ck 1 +▁kommen 1 +ige 1 +▁große 1 +▁dort 1 +▁Sicherheit 1 +ul 1 +zeit 1 +▁Zusammenarbeit 1 +▁genau 1 +▁Hier 1 +▁Ländern 1 +▁stehen 1 +▁Als 1 +th 1 +▁beim 1 +im 1 +▁Namen 1 +V 1 +▁Frauen 1 +as 1 +▁Fragen 1 +stand 1 +▁besteht 1 +▁Politik 1 +id 1 +heit 1 +▁Mittel 1 +land 1 +▁5 1 +os 1 +am 1 +▁System 1 +▁sch 1 +▁Zukunft 1 +▁% 1 +▁großen 1 +▁steht 1 +ad 1 +ze 1 +▁m 1 +▁Weise 1 +her 1 +▁Bedeutung 1 +▁direkt 1 +▁Ende 1 +▁Markt 1 +▁Ihr 1 +▁Seite 1 +▁unser 1 +▁Probleme 1 +au 1 +▁Kollegen 1 +▁worden 1 +▁stellen 1 +▁... 1 +▁De 1 +▁Möglichkeit 1 +▁Kunden 1 +lo 1 +▁Thema 1 +▁aller 1 +▁erste 1 +▁all 1 +▁Kommissar 1 +▁Tag 1 +da 1 +▁Ha 1 +ungs 1 +ha 1 +▁Geld 1 +▁Sp 1 +▁sicher 1 +▁Mal 1 +▁eigenen 1 +▁Ro 1 +ic 1 +▁Unter 1 +na 1 +auf 1 +▁muß 1 +▁Pro 1 +▁to 1 +▁Meinung 1 +▁klar 1 +▁Rolle 1 +▁Wasser 1 +▁Schutz 1 +▁Umwelt 1 +ien 1 +▁USA 1 +ent 1 +ler 1 +ben 1 +▁gegenüber 1 +▁politischen 1 +▁Programm 1 +▁seinen 1 +j 1 +▁sogar 1 +▁Staaten 1 +▁z 1 +▁ihn 1 +▁daher 1 +▁daran 1 +politik 1 +go 1 +do 1 +ster 1 +ach 1 +▁Über 1 +▁la 1 +▁erreichen 1 +ti 1 +bau 1 +enden 1 +▁Vi 1 +men 1 +▁ja 1 +ber 1 +gehen 1 +▁will 1 +end 1 +▁gerade 1 +he 1 +▁Verfügung 1 +▁besonders 1 +▁Ort 1 +▁gemacht 1 +▁jeder 1 +▁20 1 +gel 1 +▁unterstützen 1 +▁gehen 1 +▁etwa 1 +▁kommt 1 +▁weitere 1 +▁fest 1 +▁Millionen 1 +▁Al 1 +▁Situation 1 +▁Internet 1 +▁bekannt 1 +cht 1 +ca 1 +▁zwar 1 +▁Haus 1 +▁Dinge 1 +▁vielleicht 1 +▁macht 1 +▁Finanz 1 +▁weit 1 +mp 1 +▁glaube 1 +ce 1 +▁weniger 1 +▁europäische 1 +▁Auch 1 +▁verschiedenen 1 +▁sowohl 1 +hn 1 +ke 1 +är 1 +▁innerhalb 1 +▁Punkt 1 +▁Bo 1 +▁bringen 1 +▁Gesellschaft 1 +▁Am 1 +▁führen 1 +▁sagte 1 +ende 1 +min 1 +ger 1 +▁Lösung 1 +▁Alle 1 +halt 1 +ion 1 +co 1 +▁Spiel 1 +▁vielen 1 +ist 1 +▁Hilfe 1 +di 1 +▁meisten 1 +▁entfernt 1 +hin 1 +), 1 +▁hinaus 1 +vor 1 +▁gesagt 1 +▁Gemeinschaft 1 +ff 1 +▁Seiten 1 +▁werde 1 +sa 1 +▁Form 1 +▁mein 1 +che 1 +▁Ihrer 1 +ß 1 +▁politische 1 +▁besser 1 +ment 1 +igkeit 1 +▁voll 1 +▁Geschichte 1 +▁Richtlinie 1 +▁schnell 1 +▁bieten 1 +▁Li 1 +▁indem 1 +▁stellt 1 +▁c 1 +▁Wirtschaft 1 +▁nehmen 1 +▁würden 1 +▁internationalen 1 +▁weiß 1 +▁U 1 +▁Haupt 1 +▁Projekt 1 +so 1 +▁Artikel 1 +ll 1 +▁unserem 1 +▁beiden 1 +io 1 +▁wirtschaftliche 1 +EN 1 +ell 1 +▁Leute 1 +teil 1 +▁Kon 1 +▁Zusammenhang 1 +ex 1 +▁hatten 1 +▁deutlich 1 +▁bedeutet 1 +▁6 1 +ko 1 +▁befindet 1 +iz 1 +▁vier 1 +▁Preis 1 +ür 1 +▁18 1 +▁brauchen 1 +▁tatsächlich 1 +▁möchten 1 +▁15 1 +werk 1 +spiel 1 +▁Bild 1 +vi 1 +recht 1 +▁schaffen 1 +▁J 1 +sp 1 +fe 1 +ph 1 +zi 1 +op 1 +om 1 +▁nämlich 1 +▁solche 1 +▁sei 1 +ität 1 +Z 1 +▁Natur 1 +raum 1 +▁ihm 1 +▁China 1 +ier 1 +▁Kultur 1 +▁Fa 1 +ho 1 +ub 1 +▁gute 1 +▁Co 1 +▁Mar 1 +▁Mo 1 +▁wenig 1 +▁Rechts 1 +pro 1 +ine 1 +▁denke 1 +▁Deshalb 1 +6 1 +▁bereit 1 +▁darin 1 +com 1 +ken 1 +▁Neu 1 +art 1 +▁Bar 1 +▁Dank 1 +gang 1 +▁Ja 1 +iv 1 +form 1 +▁Uhr 1 +▁Do 1 +▁gab 1 +▁bitte 1 +▁Durch 1 +▁' 1 +▁Entschließung 1 +▁Ka 1 +▁le 1 +▁könnten 1 +▁wichtige 1 +▁Ebene 1 +isten 1 +▁Du 1 +▁w 1 +▁Ihren 1 +ve 1 +▁Personen 1 +▁He 1 +ale 1 +wand 1 +▁Wahl 1 +▁Hand 1 +▁Kosten 1 +▁zeigt 1 +▁Wirtschafts 1 +gabe 1 +▁leben 1 +▁jeden 1 +and 1 +▁Ar 1 +▁Industrie 1 +▁unterstützt 1 +▁Verbraucher 1 +▁Entscheidung 1 +▁sprechen 1 +ton 1 +▁Ca 1 +▁kleine 1 +ga 1 +▁Dieses 1 +▁gleich 1 +halten 1 +wa 1 +▁Präsidentin 1 +ismus 1 +▁notwendig 1 +▁lange 1 +weise 1 +▁Haushalts 1 +▁erst 1 +los 1 +man 1 +▁Zugang 1 +▁Ansicht 1 +▁Lebens 1 +▁Bezug 1 +▁Pa 1 +▁arbeiten 1 +hi 1 +▁Region 1 +bereich 1 +▁keinen 1 +000 1 +▁7 1 +▁neu 1 +▁Fraktion 1 +▁Raum 1 +▁Jahres 1 +haus 1 +▁Dienstleistung 1 +tel 1 +▁ihrem 1 +▁dessen 1 +▁Prozess 1 +▁Rück 1 +tag 1 +mm 1 +sicht 1 +▁Also 1 +age 1 +ren 1 +▁nutzen 1 +▁Rates 1 +▁seinem 1 +geben 1 +gt 1 +kommen 1 +▁The 1 +▁heraus 1 +▁Arbeits 1 +stellen 1 +▁hoch 1 +▁Produkte 1 +▁st 1 +▁ausge 1 +% 1 +keit 1 +▁Abstimmung 1 +▁nächsten 1 +mo 1 +▁unterschiedlich 1 +▁Unsere 1 +▁Demokratie 1 +▁Erfolg 1 +▁30 1 +▁einigen 1 +▁gehört 1 +▁Dieser 1 +pa 1 +▁handelt 1 +lä 1 +va 1 +▁gilt 1 +▁entwickelt 1 +ly 1 +0 1 +▁konnte 1 +▁schwer 1 +▁Ent 1 +Berichterstatter 1 +tt 1 +tritt 1 +nt 1 +▁Lo 1 +▁darf 1 +▁Sa 1 +▁Anwendung 1 +son 1 +▁öffentlichen 1 +▁Verfahren 1 +▁12 1 +▁Aussprache 1 +lu 1 +▁besten 1 +ick 1 +▁sollen 1 +▁erreicht 1 +▁Rechte 1 +▁fünf 1 +▁verwendet 1 +ert 1 +▁Service 1 +pi 1 +log 1 +▁Geschäfts 1 +▁Ho 1 +▁Menschenrechte 1 +bo 1 +▁t 1 +▁Grundlage 1 +▁Richtung 1 +▁weltweit 1 +▁Software 1 +mittel 1 +▁Website 1 +▁Europas 1 +▁Bi 1 +▁Verbindung 1 +▁weiterhin 1 +▁ebenfalls 1 +▁Verantwortung 1 +pe 1 +uch 1 +▁hätte 1 +▁nationalen 1 +än 1 +▁Doch 1 +▁Deutschland 1 +ug 1 +no 1 +▁Debatte 1 +lei 1 +ry 1 +akt 1 +5 1 +▁Ra 1 +▁kleinen 1 +▁Me 1 +ak 1 +▁deshalb 1 +▁Wettbewerb 1 +▁dürfen 1 +▁betrifft 1 +tra 1 +▁Bau 1 +▁zeigen 1 +▁bzw 1 +lichkeit 1 +▁Qualität 1 +bel 1 +schutz 1 +▁Tatsache 1 +▁gemeinsame 1 +▁Bevölkerung 1 +▁Gebiet 1 +hr 1 +▁ca 1 +▁Na 1 +bi 1 +system 1 +▁wahr 1 +▁ange 1 +▁Sozial 1 +elt 1 +▁2009 1 +▁Krieg 1 +per 1 +▁hoffe 1 +4 1 +▁Vereinigte 1 +ok 1 +ja 1 +▁eigentlich 1 +▁meinen 1 +ive 1 +▁Reihe 1 +weg 1 +▁aufgrund 1 +ling 1 +▁Antwort 1 +▁Umsetzung 1 +▁Pi 1 +▁halten 1 +all 1 +zen 1 +▁bekommen 1 +▁abge 1 +▁fast 1 +▁Reise 1 +ierte 1 +▁oft 1 +▁8 1 +▁verstehen 1 +▁h 1 +gi 1 +ard 1 +▁ganze 1 +▁sozialen 1 +▁auszu 1 +ium 1 +reg 1 +▁Vertrag 1 +▁Interesse 1 +▁& 1 +pl 1 +rat 1 +fa 1 +ant 1 +▁meinem 1 +▁g 1 +▁kurz 1 +▁darum 1 +▁Abkommen 1 +▁Ziele 1 +Änderungsanträge 1 +▁Daher 1 +▁Wert 1 +ün 1 +▁recht 1 +▁helfen 1 +▁Produkt 1 +▁Wort 1 +nehmen 1 +▁statt 1 +▁führt 1 +tal 1 +▁Schritt 1 +▁nie 1 +▁Partner 1 +▁50 1 +▁Selbst 1 +rü 1 +▁bleiben 1 +ou 1 +ER 1 +▁dar 1 +if 1 +rie 1 +▁paar 1 +▁Bedingungen 1 +▁se 1 +▁US 1 +▁p 1 +ord 1 +ci 1 +mon 1 +igung 1 +▁später 1 +▁wohl 1 +▁Musik 1 +▁heißt 1 +ör 1 +fi 1 +▁Nicht 1 +mit 1 +lassen 1 +▁Auswirkungen 1 +ow 1 +sta 1 +sk 1 +acht 1 +kon 1 +▁Herausforderung 1 +port 1 +▁richtig 1 +tro 1 +einander 1 +sche 1 +▁Gr 1 +anz 1 +▁erforderlich 1 +St 1 +▁frei 1 +▁jede 1 +▁Ta 1 +eln 1 +▁Nun 1 +▁Druck 1 +▁Ergebnis 1 +▁Ihrem 1 +up 1 +cken 1 +haft 1 +▁di 1 +▁Text 1 +fen 1 +▁Kontrolle 1 +▁Möglichkeiten 1 +▁Mai 1 +stellung 1 +▁Ch 1 +▁Institutionen 1 +▁gemeinsam 1 +▁verschiedene 1 +▁findet 1 +ot 1 +▁per 1 +weisen 1 +gleich 1 +▁internationale 1 +▁beispielsweise 1 +lle 1 +▁Vorschläge 1 +▁allerdings 1 +▁Macht 1 +▁verbunden 1 +▁jedes 1 +nd 1 +▁jedem 1 +▁hinter 1 +▁gestellt 1 +▁je 1 +▁gemeinsamen 1 +▁Von 1 +▁Ri 1 +▁eigene 1 +lass 1 +▁for 1 +ill 1 +▁Ne 1 +▁Design 1 +▁Ko 1 +▁Strategie 1 +du 1 +In 1 +▁gleichzeitig 1 +▁ermöglicht 1 +▁Person 1 +20 1 +schw 1 +▁Dr 1 +▁Hinblick 1 +pf 1 +▁Handels 1 +fer 1 +▁Forschung 1 +▁her 1 +mi 1 +▁Damen 1 +gu 1 +reich 1 +ng 1 +gr 1 +X 1 +verständlich 1 +▁einzige 1 +mäßig 1 +▁Version 1 +ei 1 +▁Verordnung 1 +▁Minuten 1 +sh 1 +▁Blick 1 +▁Prozent 1 +ld 1 +á 1 +▁ebenso 1 +▁gesamten 1 +isierung 1 +▁ging 1 +sion 1 +▁Buch 1 +▁Sicherheits 1 +▁Einsatz 1 +▁leicht 1 +▁Ausdruck 1 +▁Ergebnisse 1 +▁sieht 1 +arbeit 1 +▁enthalten 1 +ich 1 +▁sp 1 +▁Landes 1 +▁Funktion 1 +▁Computer 1 +weit 1 +▁9 1 +▁Familien 1 +( 1 +▁Nähe 1 +▁einzelnen 1 +▁Kunst 1 +▁warum 1 +handel 1 +tri 1 +▁Auto 1 +▁Aufgabe 1 +lage 1 +▁Meine 1 +spolitik 1 +▁Beziehungen 1 +▁gegeben 1 +▁Video 1 +▁völlig 1 +▁Freiheit 1 +Lachen 1 +▁100 1 +ellen 1 +unter 1 +▁einzu 1 +wei 1 +▁14 1 +▁Liste 1 +▁soziale 1 +gestellt 1 +▁Russland 1 +arbeiten 1 +▁Hotels 1 +” 1 +elle 1 +▁schwierig 1 +▁nichts 1 +bild 1 +▁Interessen 1 +▁liegen 1 +▁angenommen 1 +▁/ 1 +▁Erweiterung 1 +trag 1 +markt 1 +▁bevor 1 +▁Regionen 1 +▁zweite 1 +sektor 1 +nder 1 +träge 1 +▁1. 1 +▁Platz 1 +▁setzen 1 +Mail 1 +zug 1 +▁Film 1 +ate 1 +del 1 +▁Berg 1 +innen 1 +iger 1 +▁Änderungsantrag 1 +▁ganzen 1 +▁Kampf 1 +▁Po 1 +▁Gewalt 1 +▁Zum 1 +▁versuchen 1 +ip 1 +sel 1 +▁spielen 1 +▁danken 1 +▁Gruppe 1 +ere 1 +▁Verhandlungen 1 +▁schriftlich 1 +ierten 1 +▁ma 1 +▁Groß 1 +▁versucht 1 +ud 1 +ah 1 +über 1 +▁Abgeordneten 1 +▁Gelegenheit 1 +berg 1 +▁Mitarbeiter 1 +▁lang 1 +▁Technologie 1 +hm 1 +▁Herren 1 +hilfe 1 +▁16 1 +tlich 1 +▁To 1 +tu 1 +▁Gefahr 1 +Re 1 +▁2008 1 +▁Zahl 1 +▁Seit 1 +platz 1 +ktion 1 +▁Tat 1 +▁Position 1 +rit 1 +▁Kolleginnen 1 +greifen 1 +lang 1 +tik 1 +▁Kraft 1 +▁2010 1 +▁17 1 +▁Fehler 1 +▁Nutzung 1 +▁San 1 +▁Gesundheit 1 +▁Sache 1 +po 1 +▁25 1 +▁allein 1 +▁Ba 1 +▁verfügt 1 +▁Ex 1 +▁Türkei 1 +▁Mitglieder 1 +_ 1 +▁Informations 1 +igt 1 +ziel 1 +▁besondere 1 +▁Erfahrung 1 +länder 1 +tre 1 +▁Krise 1 +ang 1 +▁ex 1 +▁Außerdem 1 +▁wegen 1 +fall 1 +▁Wa 1 +▁Staats 1 +▁Kontakt 1 +▁Sport 1 +rt 1 +kel 1 +▁Förderung 1 +▁geführt 1 +setzung 1 +▁Beitrag 1 +▁Idee 1 +▁Dialog 1 +Ver 1 +▁Sta 1 +▁gerne 1 +▁entwickeln 1 +▁X 1 +▁kam 1 +Gelächter 1 +▁rund 1 +leben 1 +▁bleibt 1 +." 1 +ac 1 +kraft 1 +icht 1 +▁Bilder 1 +▁Gra 1 +▁erfolgreich 1 +▁Wer 1 +fang 1 +▁Anfang 1 +▁mehrere 1 +▁Themen 1 +ni 1 +legen 1 +▁äußerst 1 +ru 1 +▁verbessern 1 +▁befinden 1 +pt 1 +▁neben 1 +▁Verbesserung 1 +cher 1 +▁Bereichen 1 +▁Sektor 1 +rk 1 +ator 1 +ki 1 +▁Ausschuss 1 +kosten 1 +pp 1 +▁Ausschusses 1 +▁Luft 1 +▁abzu 1 +▁mal 1 +▁denken 1 +ort 1 +▁Meer 1 +▁Modell 1 +sehen 1 +▁jemand 1 +mmer 1 +est 1 +wert 1 +▁Gebäude 1 +▁gar 1 +isieren 1 +za 1 +▁Unser 1 +▁verwenden 1 +bt 1 +▁Steuer 1 +▁Zentrum 1 +▁gestimmt 1 +▁13 1 +voll 1 +▁is 1 +dem 1 +7 1 +▁stark 1 +▁“ 1 +▁Kl 1 +nis 1 +▁u 1 +▁Jahrhundert 1 +ssen 1 +▁konnten 1 +führung 1 +▁lässt 1 +▁unabhängig 1 +tes 1 +ron 1 +▁ermöglichen 1 +▁stärker 1 +schlag 1 +satz 1 +▁Körper 1 +tet 1 +▁Regierungen 1 +die 1 +net 1 +ähr 1 +serv 1 +ina 1 +erung 1 +org 1 +▁Woche 1 +’ 1 +▁Web 1 +steuer 1 +▁Fl 1 +▁Behörden 1 +▁New 1 +▁Damit 1 +▁gleichen 1 +▁zweiten 1 +mel 1 +▁Familie 1 +▁unseres 1 +▁Konferenz 1 +▁km 1 +▁Augen 1 +sam 1 +▁Wissen 1 +▁Menge 1 +▁con 1 +ap 1 +▁obwohl 1 +setzen 1 +▁Flug 1 +▁ändern 1 +fr 1 +▁Wachstum 1 +▁Benutzer 1 +▁Tage 1 +▁Licht 1 +▁erklärt 1 +one 1 +alen 1 +▁See 1 +▁Wo 1 +stelle 1 +▁bestimmte 1 +] 1 +▁kon 1 +▁Konflikt 1 +fahrt 1 +▁Arbeitnehmer 1 +our 1 +▁deine 1 +▁Landwirtschaft 1 +folge 1 +▁Lissabon 1 +tr 1 +▁wichtigen 1 +▁wichtigsten 1 +gegeben 1 +▁Anzahl 1 +▁bestehen 1 +wo 1 +▁Lu 1 +▁El 1 +des 1 +▁Zeitpunkt 1 +▁National 1 +▁Team 1 +ika 1 +▁großer 1 +▁Fischerei 1 +▁fördern 1 +▁gesamte 1 +▁2007 1 +▁Schaffung 1 +▁Dann 1 +▁scheint 1 +▁Sicht 1 +▁hinsichtlich 1 +▁Reform 1 +nde 1 +▁wollte 1 +hal 1 +▁24 1 +▁Linie 1 +▁Wein 1 +▁Einzel 1 +▁hohe 1 +▁Standpunkt 1 +▁Je 1 +▁Frankreich 1 +▁größte 1 +ue 1 +rin 1 +arm 1 +▁Entwicklungs 1 +▁häufig 1 +▁weiteren 1 +▁öffentliche 1 +llen 1 +▁Sprache 1 +▁bisher 1 +▁Gemeinschafts 1 +ide 1 +▁Notwendigkeit 1 +▁ständig 1 +äu 1 +mb 1 +mer 1 +bl 1 +▁Ki 1 +▁Werk 1 +▁wobei 1 +last 1 +rufen 1 +▁passiert 1 +▁gesehen 1 +▁gebracht 1 +▁Binnenmarkt 1 +heim 1 +ts 1 +qui 1 +▁Gesamt 1 +▁Verwendung 1 +▁sagt 1 +zahl 1 +wirtschaft 1 +▁Angebot 1 +▁anderer 1 +si 1 +ana 1 +J 1 +▁Moment 1 +istischen 1 +vo 1 +▁Produktion 1 +eur 1 +▁nächste 1 +▁Con 1 +ran 1 +▁anzu 1 +▁Kind 1 +Be 1 +the 1 +▁Behandlung 1 +▁Ad 1 +▁[ 1 +stein 1 +zer 1 +▁Di 1 +bu 1 +▁Bildung 1 +▁Denn 1 +eu 1 +▁kennen 1 +▁Ni 1 +ign 1 +▁Dollar 1 +bei 1 +str 1 +▁Während 1 +▁sechs 1 +ischer 1 +zeug 1 +setzt 1 +feld 1 +▁Währung 1 +wer 1 +▁tragen 1 +▁Medien 1 +▁2000 1 +▁genug 1 +▁Juni 1 +▁Pla 1 +▁Mitglied 1 +▁Mann 1 +▁derzeit 1 +▁größere 1 +▁wer 1 +▁Restaurant 1 +▁Handel 1 +programm 1 +▁Tra 1 +kl 1 +]] 1 +▁hätten 1 +An 1 +fahren 1 +res 1 +▁Umgebung 1 +▁Mehrheit 1 +lau 1 +▁durchgeführt 1 +▁Vertrags 1 +Applaus 1 +▁Tri 1 +▁Frei 1 +▁Zweck 1 +▁Afrika 1 +üt 1 +30 1 +▁letzte 1 +▁Y 1 +▁Verfassung 1 +industrie 1 +▁Vertreter 1 +▁No 1 +welt 1 +jahr 1 +hl 1 +▁40 1 +▁meines 1 +▁Finanzierung 1 +ob 1 +▁Lösungen 1 +▁Bitte 1 +seite 1 +ay 1 +▁Einführung 1 +frage 1 +▁Fortschritte 1 +▁falsch 1 +je 1 +▁enthält 1 +". 1 +▁ausgestattet 1 +▁Beschäftigung 1 +▁tief 1 +▁li 1 +▁Stunden 1 +▁anders 1 +ös 1 +▁treffen 1 +▁Her 1 +▁wirtschaftlich 1 +▁braucht 1 +▁genannt 1 +▁Ju 1 +▁Lebensmittel 1 +▁gehören 1 +▁sorgen 1 +schau 1 +blick 1 +schen 1 +▁Jo 1 +▁v 1 +stre 1 +▁geeignet 1 +▁Gu 1 +▁Darüber 1 +treib 1 +о 1 +▁eindeutig 1 +fass 1 +▁wahrscheinlich 1 +Y 1 +▁komplett 1 +▁schließlich 1 +wi 1 +wissenschaftlich 1 +mann 1 +istische 1 +▁Integration 1 +▁he 1 +▁Te 1 +netz 1 +▁geschlossen 1 +zimmer 1 +▁sa 1 +▁Innovation 1 +▁Folgen 1 +▁Fahr 1 +▁guten 1 +▁lediglich 1 +nahme 1 +▁neuer 1 +▁Entscheidungen 1 +▁praktisch 1 +▁0 1 +▁Tier 1 +▁Instrument 1 +8 1 +50 1 +▁nahe 1 +▁Milliarden 1 +▁[[ 1 +iti 1 +▁All 1 +▁getan 1 +▁glauben 1 +▁Konzept 1 +▁verfügen 1 +ven 1 +▁Änderung 1 +▁zunächst 1 +itz 1 +▁Diskussion 1 +▁erwähnt 1 +▁Zwei 1 +▁Gruppen 1 +▁we 1 +▁Stelle 1 +har 1 +schließen 1 +▁Gesundheits 1 +▁Aufmerksamkeit 1 +met 1 +hör 1 +▁Mi 1 +▁Schl 1 +▁Herzen 1 +lt 1 +▁Vertrauen 1 +führen 1 +▁absolut 1 +▁Gericht 1 +sätze 1 +▁Inhalt 1 +genommen 1 +▁eingesetzt 1 +▁Punkte 1 +▁leisten 1 +able 1 +▁Park 1 +fo 1 +ix 1 +▁Höhe 1 +sprech 1 +▁Not 1 +▁unbe 1 +▁liebe 1 +▁Grenzen 1 +▁Fach 1 +▁Projekte 1 +bringen 1 +", 1 +▁bessere 1 +▁funktioniert 1 +▁Wi 1 +▁beste 1 +▁Wochen 1 +▁Test 1 +▁Klima 1 +▁Inter 1 +schä 1 +▁Ansatz 1 +▁bestimmten 1 +▁Änderungen 1 +tisch 1 +▁Schiff 1 +od 1 +punkt 1 +preis 1 +▁Bekämpfung 1 +sicherheit 1 +▁Beitritt 1 +▁Erklärung 1 +▁Auswahl 1 +▁Präsidentschaft 1 +▁Online 1 +stra 1 +▁solchen 1 +▁Gen 1 +▁Vereinbarung 1 +▁Bank 1 +fähig 1 +▁Versuch 1 +▁bringt 1 +▁größten 1 +▁Urlaub 1 +pass 1 +▁Fisch 1 +uell 1 +▁Italien 1 +▁lo 1 +▁Schw 1 +ku 1 +wie 1 +tern 1 +▁geworden 1 +▁on 1 +ity 1 +▁zehn 1 +▁früher 1 +▁Privat 1 +tor 1 +▁effektiv 1 +▁wichtiger 1 +▁gewährleisten 1 +▁dadurch 1 +plan 1 +▁vorhanden 1 +▁Haushalt 1 +änge 1 +▁Ru 1 +▁erneut 1 +ange 1 +▁Rede 1 +uß 1 +AN 1 +▁morgen 1 +▁eher 1 +ance 1 +▁jene 1 +▁hohen 1 +▁Strand 1 +zig 1 +▁verfügbar 1 +ank 1 +▁Gäste 1 +hä 1 +▁Insel 1 +▁l 1 +▁bi 1 +▁vorgeschlagen 1 +glich 1 +ba 1 +▁Kommissarin 1 +▁Natürlich 1 +laden 1 +▁automatisch 1 +▁Investitionen 1 +▁Zusammen 1 +IT 1 +buch 1 +▁Erachtens 1 +▁offen 1 +dig 1 +▁außerdem 1 +▁somit 1 +▁Außen 1 +Sch 1 +▁Kern 1 +▁Nur 1 +legt 1 +▁Heute 1 +out 1 +▁überhaupt 1 +▁Ga 1 +▁Ratspräsident 1 +▁2006 1 +ön 1 +▁Öl 1 +bare 1 +▁Pe 1 +▁Opfer 1 +ziehen 1 +▁erkennen 1 +▁Preise 1 +sverfahren 1 +▁vergangenen 1 +isiert 1 +▁Initiative 1 +▁vollständig 1 +▁genommen 1 +▁Ti 1 +ette 1 +press 1 +▁setzt 1 +▁Werte 1 +▁Viele 1 +▁Auffassung 1 +systeme 1 +sfähigkeit 1 +▁gefunden 1 +▁Car 1 +▁zahlreiche 1 +▁begann 1 +▁Armut 1 +barkeit 1 +ite 1 +▁vorgesehen 1 +▁Aufenthalt 1 +▁Sommer 1 +bot 1 +▁Nationen 1 +det 1 +stehen 1 +▁Sinne 1 +▁Dabei 1 +▁Aktivitäten 1 +cu 1 +IS 1 +alität 1 +▁erster 1 +ind 1 +▁Regeln 1 +▁Dis 1 +▁halte 1 +▁Führung 1 +▁suchen 1 +▁lernen 1 +▁behandelt 1 +▁Ressourcen 1 +ade 1 +eb 1 +▁beginnen 1 +stieg 1 +▁hervor 1 +amm 1 +® 1 +▁folgt 1 +▁ziehen 1 +▁vorgelegt 1 +▁aufge 1 +les 1 +▁do 1 +utz 1 +▁erwarten 1 +▁Frieden 1 +ult 1 +ock 1 +▁Verkehrs 1 +▁Gefühl 1 +tischen 1 +▁Lassen 1 +kom 1 +▁beide 1 +schi 1 +▁fand 1 +cke 1 +▁Warum 1 +▁festgelegt 1 +▁vergessen 1 +unternehmen 1 +▁Öffentlichkeit 1 +▁Bio 1 +▁länger 1 +ht 1 +▁Berlin 1 +lagen 1 +fach 1 +▁Basis 1 +ect 1 +▁Nord 1 +druck 1 +AR 1 +frei 1 +tan 1 +▁aufgenommen 1 +▁erfahren 1 +ice 1 +▁Folge 1 +merk 1 +fl 1 +ino 1 +▁verbessert 1 +▁Sitzung 1 +▁Beginn 1 +▁Besuch 1 +▁Neben 1 +tin 1 +▁alten 1 +wachsen 1 +ES 1 +Ge 1 +▁Organisation 1 +▁gelegen 1 +▁Ob 1 +▁Kapital 1 +▁arbeitet 1 +▁Dienst 1 +▁diejenigen 1 +▁September 1 +▁insgesamt 1 +▁gewesen 1 +Ma 1 +mor 1 +ruf 1 +▁Vielen 1 +ker 1 +mun 1 +▁Jetzt 1 +▁Stellen 1 +richten 1 +▁co 1 +▁geschaffen 1 +stoff 1 +stände 1 +▁Tätigkeit 1 +ris 1 +ordnung 1 +▁fort 1 +10 1 +▁Mehr 1 +▁Port 1 +▁Bad 1 +▁ziemlich 1 +eil 1 +ya 1 +▁verantwortlich 1 +ssystem 1 +▁Standard 1 +service 1 +▁ruhig 1 +▁voran 1 +▁et 1 +trä 1 +▁angesicht 1 +▁Bu 1 +▁benötigen 1 +▁einschließlich 1 +▁Vorschriften 1 +▁tra 1 +▁weder 1 +tische 1 +▁Juli 1 +▁Transparenz 1 +▁moderne 1 +▁Dezember 1 +▁Ste 1 +▁Israel 1 +▁Küche 1 +▁nimmt 1 +▁benutzt 1 +▁kannst 1 +▁Ausbildung 1 +ließ 1 +▁eng 1 +▁kleiner 1 +▁Monaten 1 +▁richtige 1 +rate 1 +partner 1 +▁Hinsicht 1 +▁Staat 1 +▁erinnern 1 +▁Programme 1 +lose 1 +▁UN 1 +enz 1 +▁Cha 1 +▁sofort 1 +▁Tagesordnung 1 +▁aktiv 1 +▁schützen 1 +▁erfolgt 1 +▁dringend 1 +▁Pri 1 +rechte 1 +gebiet 1 +▁mag 1 +▁Bestimmungen 1 +tat 1 +▁erzielt 1 +tim 1 +▁ko 1 +▁11 1 +▁Republik 1 +▁Ä 1 +▁vertreten 1 +ett 1 +▁Hause 1 +schrift 1 +ender 1 +▁verändert 1 +▁Kom 1 +▁inter 1 +▁War 1 +▁Netzwerk 1 +hof 1 +tum 1 +▁Nacht 1 +▁Forschungs 1 +▁aufzu 1 +▁Dritt 1 +▁Vergangenheit 1 +▁po 1 +▁o 1 +▁leider 1 +▁Bereiche 1 +problem 1 +CH 1 +teile 1 +▁Einige 1 +▁bewusst 1 +▁Stabilität 1 +▁beitragen 1 +▁unge 1 +cker 1 +laufen 1 +▁Bemühungen 1 +mar 1 +▁zumindest 1 +RO 1 +wirken 1 +kop 1 +▁Klein 1 +▁000 1 +▁Firma 1 +▁täglich 1 +▁Or 1 +▁Gründen 1 +Die 1 +que 1 +nnen 1 +▁Gehirn 1 +▁Unternehmens 1 +▁wenige 1 +lin 1 +SE 1 +▁Kr 1 +▁2. 1 +▁Weiter 1 +erweise 1 +hält 1 +▁bezüglich 1 +▁Untersuchung 1 +▁europäischer 1 +▁kostenlos 1 +▁fragen 1 +▁gemäß 1 +daten 1 +▁Information 1 +trieb 1 +▁zunehmend 1 +gegangen 1 +▁Kompromiss 1 +▁erwartet 1 +▁Fällen 1 +RE 1 +▁Risiko 1 +▁Kar 1 +kal 1 +▁Vorsitz 1 +lauf 1 +▁Erst 1 +▁erlaubt 1 +▁Fu 1 +▁euch 1 +Fraktion 1 +lü 1 +▁Anforderungen 1 +verkehr 1 +▁Dokument 1 +richt 1 +▁Organ 1 +▁verhindern 1 +DE 1 +ände 1 +а 1 +▁Paket 1 +▁Post 1 +▁begrüße 1 +▁folgen 1 +▁Generation 1 +dienst 1 +▁si 1 +▁wesentlich 1 +▁Aufnahme 1 +▁Wieder 1 +▁unten 1 +▁Struktur 1 +▁Aspekte 1 +pol 1 +ring 1 +licher 1 +▁Bewegung 1 +schein 1 +▁Amerika 1 +ition 1 +werte 1 +▁Solidarität 1 +▁Alter 1 +▁versch 1 +alter 1 +▁gerecht 1 +fonds 1 +kehr 1 +е 1 +▁Fi 1 +▁Indien 1 +schluss 1 +kla 1 +lan 1 +▁Januar 1 +▁gr 1 +bahn 1 +▁West 1 +wasser 1 +▁Th 1 +▁Protokoll 1 +mä 1 +Schlusselwortern 1 +▁gezeigt 1 +▁spielt 1 +ationen 1 +▁Erde 1 +reichen 1 +▁Betrieb 1 +▁Ideen 1 +▁Spanien 1 +treten 1 +▁Zur 1 +▁veröffentlicht 1 +ica 1 +▁getroffen 1 +▁März 1 +▁Bro 1 +▁Anfrage 1 +▁Zweitens 1 +▁groß 1 +we 1 +▁Terrorismus 1 +▁60 1 +äre 1 +▁Fe 1 +tar 1 +AL 1 +▁me 1 +▁Einfluss 1 +▁gleiche 1 +▁benutzen 1 +▁Gi 1 +▁Ku 1 +▁Mutter 1 +ologische 1 +con 1 +▁sah 1 +▁Annahme 1 +▁Personal 1 +app 1 +wende 1 +▁Sy 1 +▁Gewinn 1 +▁entscheiden 1 +▁Q 1 +▁nennen 1 +▁perfekt 1 +▁einge 1 +▁Auftrag 1 +fragen 1 +bildung 1 +dien 1 +▁Wissenschaft 1 +▁Straf 1 +▁Gesetz 1 +▁Partei 1 +▁wider 1 +▁Herz 1 +▁Ist 1 +▁Technik 1 +▁entsprechend 1 +▁Plan 1 +▁Erfahrungen 1 +▁2005 1 +▁bald 1 +▁benötigt 1 +ost 1 +▁unmittelbar 1 +▁schlecht 1 +oren 1 +anten 1 +▁ernst 1 +ori 1 +▁Erstens 1 +▁unver 1 +▁Schi 1 +▁begrüßen 1 +01 1 +▁Schlüssel 1 +▁Flughafen 1 +zog 1 +▁Größe 1 +aktion 1 +stellt 1 +▁normal 1 +▁Straßen 1 +▁außerhalb 1 +▁Wunsch 1 +▁Webseite 1 +▁ne 1 +▁Vergleich 1 +stück 1 +fällig 1 +▁erfüllen 1 +9 1 +misch 1 +▁Boden 1 +▁Süd 1 +IN 1 +ges 1 +isse 1 +▁Japan 1 +sie 1 +ian 1 +19 1 +▁Transport 1 +▁lösen 1 +▁endlich 1 +▁Firmen 1 +sbereich 1 +bericht 1 +▁ausreichend 1 +jo 1 +▁heutigen 1 +for 1 +wesen 1 +nom 1 +▁Oktober 1 +▁aktuellen 1 +▁Hi 1 +▁globalen 1 +▁genießen 1 +atur 1 +▁EUR 1 +▁Bru 1 +▁Fest 1 +staat 1 +▁2004 1 +ure 1 +▁fordern 1 +▁Kindern 1 +ys 1 +▁rein 1 +▁darstellt 1 +▁Aufgaben 1 +▁Monate 1 +▁Com 1 +▁Geist 1 +▁integriert 1 +▁Hoch 1 +▁eben 1 +üsse 1 +и 1 +▁Bürgerinnen 1 +park 1 +▁Bis 1 +▁Telefon 1 +▁Irak 1 +dauer 1 +▁Fernseh 1 +▁Wohn 1 +▁Märkte 1 +ano 1 +▁Wei 1 +entwicklung 1 +▁ha 1 +lö 1 +▁gewählt 1 +▁Patienten 1 +bank 1 +amp 1 +▁Su 1 +▁genutzt 1 +▁Kopf 1 +▁Meter 1 +▁überzeugt 1 +▁Objekt 1 +▁Osten 1 +è 1 +tten 1 +▁außer 1 +▁Verhalten 1 +▁stimmen 1 +▁del 1 +▁Nr 1 +▁Reaktion 1 +▁Botschaft 1 +▁Bas 1 +▁wählen 1 +nach 1 +▁Fortschritt 1 +▁Tagen 1 +▁dachte 1 +▁Sinn 1 +▁nachdem 1 +▁breite 1 +▁Tre 1 +▁Spieler 1 +▁ihres 1 +▁kaum 1 +▁Obwohl 1 +▁Vorstellung 1 +▁r 1 +▁gering 1 +uf 1 +▁weg 1 +gra 1 +leg 1 +ari 1 +▁niemand 1 +funktion 1 +ständig 1 +▁Verwaltung 1 +▁Hoffnung 1 +projekt 1 +í 1 +▁Schwierigkeiten 1 +▁trotz 1 +▁anderes 1 +▁geändert 1 +EL 1 +▁anderem 1 +reu 1 +burg 1 +▁ausschließlich 1 +betrieb 1 +▁genannten 1 +▁Option 1 +▁neues 1 +▁hören 1 +▁no 1 +▁Männer 1 +▁oben 1 +▁überall 1 +lieb 1 +▁Ski 1 +wechsel 1 +sprozess 1 +gar 1 +▁Universität 1 +fin 1 +▁Go 1 +▁Rechnung 1 +▁Künstler 1 +ations 1 +zustellen 1 +tru 1 +▁verpflichtet 1 +▁fa 1 +▁Angelegenheit 1 +▁nationale 1 +▁i 1 +▁Gar 1 +▁zusätzliche 1 +legung 1 +▁Karte 1 +▁Frühstück 1 +▁Tradition 1 +▁Präsidenten 1 +AS 1 +▁welches 1 +sitz 1 +▁Berichts 1 +leiten 1 +▁Pal 1 +leb 1 +schläge 1 +▁umfassende 1 +▁Anti 1 +▁globale 1 +▁International 1 +▁Aktion 1 +▁Bedürfnisse 1 +▁Gegen 1 +nden 1 +▁vorher 1 +▁Parteien 1 +▁berücksichtigt 1 +mü 1 +▁Gleich 1 +▁Stimme 1 +gesetzt 1 +essen 1 +▁Arbeitsplätze 1 +fern 1 +▁entsprechenden 1 +▁fr 1 +linie 1 +▁teil 1 +▁ju 1 +▁extrem 1 +▁wären 1 +kor 1 +▁Minister 1 +lit 1 +▁Verteidigung 1 +▁verstärkt 1 +▁Schritte 1 +▁offensichtlich 1 +▁Partnerschaft 1 +▁Deutsch 1 +▁Zeichen 1 +▁Mer 1 +gend 1 +ckt 1 +▁Durchführung 1 +▁Mitgliedstaat 1 +▁echte 1 +▁Bürgern 1 +▁zentrale 1 +zel 1 +▁beschlossen 1 +ai 1 +Q 1 +ionen 1 +ace 1 +▁junge 1 +▁gesetzt 1 +▁Fahrzeug 1 +üb 1 +zeichen 1 +▁Chance 1 +▁Monat 1 +▁Praxis 1 +▁Per 1 +▁offiziell 1 +▁80 1 +seitig 1 +▁gegenwärtig 1 +▁Pu 1 +▁eingehen 1 +▁entschieden 1 +▁Pol 1 +technik 1 +▁Stil 1 +▁En 1 +▁Golf 1 +ib 1 +arch 1 +zugehen 1 +▁gelten 1 +▁organisiert 1 +▁with 1 +▁erklären 1 +ON 1 +part 1 +▁Menschenrechts 1 +▁Si 1 +> 1 +ast 1 +ären 1 +▁Server 1 +▁22 1 +ial 1 +▁Klimawandel 1 +ction 1 +kan 1 +OR 1 +▁unbedingt 1 +ppe 1 +sicherung 1 +fallen 1 +ement 1 +führer 1 +▁vi 1 +▁bilden 1 +mark 1 +▁Abgeordnete 1 +▁AG 1 +ras 1 +▁Angelegenheiten 1 +▁nötig 1 +▁klare 1 +▁Datei 1 +▁Arten 1 +▁Freund 1 +messen 1 +liste 1 +▁danke 1 +ühl 1 +▁komme 1 +▁November 1 +▁Windows 1 +▁Bewertung 1 +▁Organisationen 1 +▁früh 1 +▁eingerichtet 1 +▁bedeuten 1 +▁übernehmen 1 +lung 1 +▁lesen 1 +▁Den 1 +▁Technologien 1 +▁erhöht 1 +reisen 1 +▁Amt 1 +▁Arm 1 +▁bestimmt 1 +macht 1 +fel 1 +fuhr 1 +▁Begriff 1 +▁Atmosphäre 1 +▁usw 1 +äl 1 +▁Fuß 1 +▁End 1 +▁gefährlich 1 +▁gesprochen 1 +EG 1 +wunder 1 +▁technische 1 +▁Pf 1 +▁alte 1 +▁jährlich 1 +▁Mitteilung 1 +standard 1 +▁näher 1 +ete 1 +▁Spa 1 +▁Liebe 1 +erei 1 +▁menschliche 1 +▁Einrichtung 1 +▁Volk 1 +▁pa 1 +statt 1 +▁Wann 1 +▁Mor 1 +▁dich 1 +▁Infrastruktur 1 +ek 1 +anischen 1 +line 1 +▁Vo 1 +technologie 1 +▁Schul 1 +sicher 1 +▁Bus 1 +▁Gipfel 1 +▁na 1 +▁Allerdings 1 +bereit 1 +pri 1 +▁verloren 1 +dition 1 +kü 1 +▁Völker 1 +▁Abend 1 +▁Tro 1 +▁möglicherweise 1 +▁2003 1 +▁streng 1 +▁sämtliche 1 +▁Kirche 1 +▁einzelne 1 +geht 1 +▁positive 1 +▁Bre 1 +▁Chi 1 +▁Aspekt 1 +н 1 +san 1 +▁Tages 1 +▁konkrete 1 +▁Regel 1 +tzt 1 +▁funktionieren 1 +par 1 +▁Material 1 +▁Dem 1 +vel 1 +▁Hinweis 1 +▁Feld 1 +bri 1 +▁bitten 1 +hören 1 +▁erstellt 1 +▁Großbritannien 1 +▁Gründe 1 +▁Off 1 +▁technischen 1 +▁Analyse 1 +zz 1 +ight 1 +▁gesch 1 +▁finanzielle 1 +sprogramm 1 +mut 1 +▁Har 1 +hren 1 +▁Gott 1 +og 1 +▁Worte 1 +vers 1 +▁Jahrzehnt 1 +▁stets 1 +struktur 1 +wahl 1 +find 1 +sprache 1 +System 1 +▁23 1 +▁tätig 1 +gruppen 1 +▁Stück 1 +schaff 1 +▁entsprechende 1 +mitteln 1 +els 1 +▁Abschluss 1 +denken 1 +erstatterin 1 +▁Paris 1 +▁folgenden 1 +reise 1 +▁beteiligt 1 +▁Anstrengungen 1 +▁you 1 +▁persönlichen 1 +tage 1 +▁Dienste 1 +lement 1 +▁privaten 1 +▁Zustimmung 1 +▁aktuelle 1 +fehl 1 +▁entgegen 1 +▁Realität 1 +zentrum 1 +▁entstehen 1 +▁sage 1 +▁Griechenland 1 +▁modernen 1 +▁GmbH 1 +svorschriften 1 +▁Vorteile 1 +tiert 1 +sort 1 +▁jeweiligen 1 +zahlung 1 +▁rasch 1 +gl 1 +▁Haltung 1 +geb 1 +▁alt 1 +ative 1 +▁Angst 1 +▁negativ 1 +▁verändern 1 +▁April 1 +▁manchmal 1 +▁Überwachung 1 +ov 1 +▁Antrag 1 +▁Mein 1 +▁Waren 1 +▁Dazu 1 +▁geleistet 1 +pel 1 +getragen 1 +ili 1 +gg 1 +ile 1 +▁Wege 1 +val 1 +äh 1 +▁Schließlich 1 +▁Gast 1 +▁Stand 1 +etz 1 +hnt 1 +▁betrachtet 1 +▁folgende 1 +▁Geb 1 +▁berücksichtigen 1 +▁bewegen 1 +echt 1 +gesellschaft 1 +▁allgemeinen 1 +gesetz 1 +fälle 1 +behörde 1 +rück 1 +▁akzeptieren 1 +vent 1 +▁90 1 +▁Iran 1 +▁Hu 1 +gänge 1 +ational 1 +ros 1 +▁Garten 1 +artig 1 +losen 1 +▁Eltern 1 +▁acht 1 +▁Einstellung 1 +▁Start 1 +▁York 1 +ym 1 +▁200 1 +▁Au 1 +stoß 1 +▁qu 1 +aktiv 1 +▁App 1 +asi 1 +geber 1 +▁historischen 1 +▁Suche 1 +▁gefordert 1 +▁Verpflichtungen 1 +öl 1 +▁As 1 +▁stand 1 +eck 1 +▁Charakter 1 +▁Voll 1 +▁Ve 1 +▁Reformen 1 +▁Vielleicht 1 +▁Can 1 +▁nachhaltige 1 +▁starke 1 +dy 1 +▁derartige 1 +▁Mitte 1 +▁ausdrücklich 1 +▁Fran 1 +▁at 1 +▁ba 1 +▁Bri 1 +▁CO 1 +teilung 1 +gesch 1 +▁Wahlen 1 +▁entspricht 1 +▁Foto 1 +▁Tiere 1 +▁Sh 1 +▁Werkzeug 1 +▁zahlreichen 1 +▁Motor 1 +▁Tür 1 +red 1 +modell 1 +sser 1 +▁intensiv 1 +▁regelmäßig 1 +▁Banken 1 +▁Zweifel 1 +▁Schule 1 +▁Angriff 1 +▁Beweis 1 +▁künftig 1 +▁Ausgaben 1 +hu 1 +▁schön 1 +▁gewisse 1 +handlung 1 +ieß 1 +▁demokratischen 1 +▁Produktions 1 +hafte 1 +bre 1 +rich 1 +▁Anerkennung 1 +▁Mess 1 +Ich 1 +▁Kritik 1 +ting 1 +deck 1 +▁Sitz 1 +▁Zi 1 +griff 1 +▁dir 1 +▁Straße 1 +▁Tu 1 +gericht 1 +IC 1 +lon 1 +▁Kurs 1 +AT 1 +▁el 1 +▁Gas 1 +ott 1 +santrag 1 +region 1 +fassung 1 +oni 1 +kultur 1 +▁Zeitraum 1 +▁mindestens 1 +▁günstig 1 +▁Cor 1 +▁gekommen 1 +▁mi 1 +chi 1 +▁handeln 1 +▁Nutzen 1 +▁Zunächst 1 +▁zuvor 1 +▁speziell 1 +▁Anteil 1 +▁Komm 1 +▁militärische 1 +▁angesprochen 1 +▁Ausschuß 1 +▁Vielfalt 1 +oder 1 +▁Besucher 1 +▁gern 1 +▁hoffen 1 +▁Zug 1 +▁zudem 1 +▁Engagement 1 +▁Sonder 1 +▁musste 1 +▁Waffen 1 +maßnahmen 1 +▁Installation 1 +▁umgesetzt 1 +gruppe 1 +verfahren 1 +▁al 1 +ologie 1 +▁vermeiden 1 +▁Hintergrund 1 +zeichnen 1 +▁sicherzustellen 1 +swert 1 +▁Lehr 1 +▁Agentur 1 +▁For 1 +▁Stellungnahme 1 +▁betrachten 1 +kräfte 1 +5- 1 +lösung 1 +ekt 1 +▁seines 1 +▁nahm 1 +▁legen 1 +▁Ze 1 +tur 1 +ins 1 +leitung 1 +▁allgemeine 1 +wissen 1 +wick 1 +tung 1 +▁Kriterien 1 +▁Beratung 1 +▁Politiker 1 +?" 1 +steigen 1 +mitglied 1 +ox 1 +▁Site 1 +▁Lang 1 +▁Glück 1 +schicht 1 +Aus 1 +versorgung 1 +▁konzentrieren 1 +▁ungefähr 1 +▁Tor 1 +schalt 1 +ding 1 +räu 1 +baren 1 +zent 1 +gebracht 1 +▁by 1 +würdig 1 +▁erweitert 1 +▁Ereignisse 1 +▁demokratische 1 +▁halb 1 +▁Holz 1 +▁persönlich 1 +▁Des 1 +▁Schaden 1 +▁erfüllt 1 +▁beziehen 1 +▁hinweisen 1 +ST 1 +zeichnung 1 +▁Jung 1 +▁Mag 1 +ration 1 +▁Militär 1 +▁etc 1 +▁ar 1 +▁Freunde 1 +▁voraus 1 +gal 1 +▁Netz 1 +▁Anpassung 1 +▁Falle 1 +▁Ausnahme 1 +▁Super 1 +▁schneller 1 +▁Eigen 1 +▁Unterkategorien 1 +prä 1 +▁deutschen 1 +▁Grunde 1 +▁sprach 1 +▁Falls 1 +▁Fähigkeit 1 +▁Mus 1 +van 1 +▁Link 1 +▁reden 1 +▁hart 1 +OS 1 +▁Gedanken 1 +▁Instrumente 1 +▁Eis 1 +are 1 +▁Mio 1 +▁miteinander 1 +▁verfolgt 1 +ßt 1 +▁Wind 1 +▁bestehenden 1 +schl 1 +▁Veränderungen 1 +▁laufen 1 +rum 1 +iss 1 +rad 1 +▁Meeres 1 +▁sicherlich 1 +▁Büro 1 +ath 1 +▁Kan 1 +▁Forderung 1 +▁Hersteller 1 +▁verlassen 1 +▁Autor 1 +krank 1 +▁Management 1 +▁Verkauf 1 +▁Kauf 1 +▁Wahrheit 1 +▁op 1 +▁finde 1 +win 1 +▁Teile 1 +▁Atom 1 +▁Mensch 1 +sol 1 +▁ergreifen 1 +träger 1 +▁Sonnen 1 +bring 1 +▁Sol 1 +▁Unterschied 1 +ther 1 +ps 1 +▁fahren 1 +rei 1 +position 1 +▁Kor 1 +Programm 1 +▁los 1 +▁niedrig 1 +▁Gold 1 +▁interessiert 1 +▁prüfen 1 +ore 1 +▁Chris 1 +▁einfache 1 +▁verursacht 1 +ffe 1 +▁zuletzt 1 +▁Rand 1 +▁London 1 +▁reich 1 +▁Leistung 1 +bus 1 +ink 1 +sinn 1 +stall 1 +tier 1 +▁Kredit 1 +▁Jeder 1 +eten 1 +▁jegliche 1 +▁gelangen 1 +rahmen 1 +▁komplexe 1 +angebot 1 +▁mehreren 1 +▁Vater 1 +▁Zwischen 1 +qua 1 +▁Einkommen 1 +▁Hel 1 +▁Roma 1 +▁27 1 +▁Stein 1 +▁Strom 1 +▁Sprachen 1 +▁Grundsatz 1 +▁Ihres 1 +▁Anspruch 1 +▁Prinzip 1 +station 1 +▁Texte 1 +▁mo 1 +▁typisch 1 +see 1 +▁Winter 1 +▁Aufbau 1 +▁Mon 1 +▁amerikanischen 1 +ski 1 +sucht 1 +bett 1 +▁geschützt 1 +fläche 1 +▁ernsthaft 1 +▁entweder 1 +▁Österreich 1 +▁finanziellen 1 +▁bedeutende 1 +▁schöne 1 +▁online 1 +▁Vereinte 1 +rau 1 +ual 1 +▁Tod 1 +kunden 1 +▁Mont 1 +Ein 1 +▁Gemeinde 1 +grenzen 1 +▁interessante 1 +wiesen 1 +▁Rad 1 +▁Absicht 1 +▁manche 1 +▁erzielen 1 +▁Titel 1 +krieg 1 +▁Ober 1 +▁anerkannt 1 +▁Signal 1 +gelassen 1 +▁Ost 1 +▁Inhalte 1 +▁Europäer 1 +▁Städte 1 +▁Kenntnis 1 +rang 1 +▁Trans 1 +US 1 +▁hält 1 +ini 1 +ffer 1 +▁erhöhen 1 +▁darunter 1 +▁darstellen 1 +▁Typ 1 +▁gewinnen 1 +Das 1 +▁Str 1 +▁interessant 1 +▁gegenseitig 1 +▁Prüfung 1 +▁stimmt 1 +▁daraus 1 +▁älter 1 +▁positiv 1 +stadt 1 +▁tut 1 +▁geschehen 1 +▁Polizei 1 +▁danach 1 +▁Aussicht 1 +mie 1 +▁dagegen 1 +▁Kooperation 1 +▁Pflicht 1 +▁Drogen 1 +▁Restaurants 1 +▁ch 1 +▁Globalisierung 1 +▁grenz 1 +▁Wirkung 1 +▁persönliche 1 +▁relativ 1 +▁Welche 1 +▁Nutzer 1 +▁2001 1 +▁ideal 1 +▁Systeme 1 +kur 1 +ndung 1 +▁Richtlinien 1 +▁Rest 1 +schul 1 +▁betonen 1 +pflicht 1 +inter 1 +▁Kommunikation 1 +▁optimal 1 +▁Landschaft 1 +▁Gesetzgebung 1 +▁70 1 +lern 1 +▁Regelung 1 +▁genauso 1 +ali 1 +freiheit 1 +▁regionale 1 +bin 1 +▁Vorbereitung 1 +ante 1 +▁wünschen 1 +▁2002 1 +volle 1 +weck 1 +stab 1 +▁großartig 1 +▁stattfinden 1 +preise 1 +geteilt 1 +▁bezeichnet 1 +vertrag 1 +▁Systems 1 +stehende 1 +▁♫ 1 +▁Wettbewerbs 1 +▁rechts 1 +▁innovative 1 +▁Sollte 1 +▁verfolgen 1 +act 1 +▁eingeführt 1 +machen 1 +▁stabil 1 +▁Angesicht 1 +▁damals 1 +geführt 1 +test 1 +▁freien 1 +mat 1 +lager 1 +tsch 1 +bürger 1 +▁angeboten 1 +▁su 1 +band 1 +▁Kategorie 1 +unk 1 +▁At 1 +▁Abschließend 1 +▁Akt 1 +haltung 1 +▁Band 1 +▁laut 1 +▁bestätigt 1 +▁Herstellung 1 +zo 1 +raten 1 +▁Kurz 1 +▁kommenden 1 +▁stärken 1 +ät 1 +▁welcher 1 +▁Gut 1 +▁erleben 1 +▁Zudem 1 +▁Kal 1 +▁Phase 1 +▁Teilnehmer 1 +▁sieben 1 +▁ho 1 +▁teilen 1 +▁angemessen 1 +sländern 1 +▁Weil 1 +▁stimme 1 +▁Mu 1 +fest 1 +▁klein 1 +schuldig 1 +▁Zell 1 +▁Fonds 1 +kul 1 +▁angegeben 1 +▁Argument 1 +▁falls 1 +roh 1 ++ 1 +▁Bundes 1 +nes 1 +wahr 1 +flug 1 +▁Irland 1 +▁Entwurf 1 +▁Termin 1 +wir 1 +& 1 +▁Formen 1 +▁herum 1 +▁Sorge 1 +▁teilweise 1 +▁Homepage 1 +▁Kontroll 1 +60 1 +▁spezielle 1 +▁500 1 +stell 1 +wirkung 1 +hängen 1 +▁Tatsächlich 1 +▁Einigung 1 +▁Flugzeug 1 +▁Königreich 1 +wertung 1 +▁com 1 +▁Übersetzung 1 +fordern 1 +▁Friedens 1 +räume 1 +RA 1 +räumen 1 +wohn 1 +▁kaufen 1 +▁grundlegende 1 +▁Mädchen 1 +▁unglaublich 1 +richtung 1 +▁elektronische 1 +energie 1 +▁gegründet 1 +▁Mikro 1 +liegen 1 +▁dank 1 +▁angenehm 1 +bad 1 +▁Sam 1 +▁Blut 1 +▁ähnlich 1 +fu 1 +▁hinzu 1 +▁Dennoch 1 +hel 1 +sabkommen 1 +material 1 +▁Status 1 +▁garantiert 1 +▁übernachten 1 +Kon 1 +15 1 +▁Garantie 1 +iere 1 +▁Hälfte 1 +▁menschlichen 1 +alisierung 1 +▁1999 1 +sagen 1 +politischen 1 +▁Leider 1 +▁einsetzen 1 +▁Presse 1 +▁langsam 1 +▁Übergang 1 +▁Polen 1 +▁Ordnung 1 +▁angezeigt 1 +ille 1 +▁Studie 1 +▁besonderen 1 +orientiert 1 +oo 1 +▁Ratsvorsitz 1 +▁grosse 1 +▁zentral 1 +▁beginnt 1 +▁Kur 1 +Er 1 +▁Qua 1 +▁geschieht 1 +▁freuen 1 +▁dennoch 1 +vention 1 +▁Verständnis 1 +▁Bestandteil 1 +paket 1 +leistung 1 +ssig 1 +arten 1 +▁freue 1 +▁Ausland 1 +▁Kamera 1 +▁Gesicht 1 +ativ 1 +nie 1 +▁richtigen 1 +▁Tisch 1 +brechen 1 +▁Hände 1 +▁bauen 1 +▁wirksam 1 +falls 1 +▁Verpflichtung 1 +zähl 1 +▁Maschinen 1 +▁Hy 1 +▁Elemente 1 +dacht 1 +▁dritte 1 +▁Grundsätze 1 +▁dienen 1 +▁Multi 1 +▁Zahlen 1 +▁dritten 1 +prinzip 1 +Le 1 +spar 1 +▁trägt 1 +werfen 1 +99 1 +▁fallen 1 +▁Danke 1 +dro 1 +▁Ur 1 +ban 1 +▁August 1 +▁hotel 1 +▁Planeten 1 +▁Prioritäten 1 +pu 1 +▁Studien 1 +▁Einheit 1 +▁fühlen 1 +anlagen 1 +▁erscheint 1 +▁Oder 1 +kreis 1 +kurs 1 +▁zahlen 1 +▁übertragen 1 +▁lebt 1 +▁Initiativen 1 +▁Absatz 1 +eller 1 +▁größer 1 +▁Willen 1 +▁aufmerksam 1 +▁Schau 1 +han 1 +▁Einrichtungen 1 +▁Ausstellung 1 +by 1 +hotel 1 +produktion 1 +▁notwendigen 1 +▁links 1 +▁hochwertige 1 +dienste 1 +nimmt 1 +▁Red 1 +▁Papier 1 +rechts 1 +▁: 1 +▁feststellen 1 +▁Tour 1 +▁erstellen 1 +ehr 1 +viel 1 +▁humanitäre 1 +schuld 1 +wagen 1 +▁schlimm 1 +rus 1 +▁betroffen 1 +▁warten 1 +▁Februar 1 +typ 1 +hy 1 +ty 1 +▁profitieren 1 +hor 1 +▁Brüssel 1 +▁wollten 1 +▁einzigartige 1 +Beifall 1 +ehrt 1 +▁fordert 1 +rom 1 +king 1 +zieht 1 +▁Genau 1 +instrument 1 +systems 1 +wart 1 +bald 1 +▁Jede 1 +▁Miss 1 +▁Jahrhunderts 1 +upp 1 +▁Westen 1 +▁herzlich 1 +öpf 1 +▁Heil 1 +stoffe 1 +▁durchaus 1 +▁Air 1 +▁Museum 1 +▁nützlich 1 +▁zufrieden 1 +zugeben 1 +▁Verlust 1 +▁Grün 1 +chten 1 +bra 1 +▁Stärkung 1 +management 1 +digen 1 +war 1 +▁hingewiesen 1 +▁Ukraine 1 +▁beschäftigt 1 +▁Verwaltungs 1 +person 1 +▁sinnvoll 1 +▁interne 1 +▁sonst 1 +▁gewährleistet 1 +▁hervorragende 1 +bü 1 +▁Gebieten 1 +tral 1 +▁lokalen 1 +▁Innen 1 +▁entscheidend 1 +Sterne 1 +▁€ 1 +▁Dimension 1 +▁diskutieren 1 +▁meist 1 +weich 1 +▁vo 1 +▁Mindest 1 +gegen 1 +▁Grenz 1 +▁Sal 1 +▁umfasst 1 +ux 1 +geladen 1 +▁besuchen 1 +▁befürworte 1 +▁Agrar 1 +▁Pan 1 +▁vernünftig 1 +▁Ton 1 +q 1 +▁que 1 +▁Funktionen 1 +▁Spezial 1 +licht 1 +▁Beschreibung 1 +▁Besitz 1 +▁abgeschlossen 1 +▁erheblich 1 +politische 1 +Que 1 +pekt 1 +▁höhere 1 +von 1 +▁begrenzt 1 +ó 1 +nahm 1 +▁vorstellen 1 +▁Achtung 1 +▁erfolgen 1 +bindung 1 +▁Vorteil 1 +▁institutionelle 1 +▁größeren 1 +▁schreiben 1 +▁Sil 1 +Arbeitslosigkeit 1 +▁beinhaltet 1 +▁Villa 1 +how 1 +▁Ferien 1 +▁Tagung 1 +▁Club 1 +▁Wald 1 +roll 1 +▁jederzeit 1 +р 1 +laub 1 +▁Worten 1 +▁Risiken 1 +▁politischer 1 +▁vollkommen 1 +▁» 1 +“, 1 +▁fertig 1 +▁gewünscht 1 +▁Umfang 1 +issen 1 +sto 1 +▁überzeugen 1 +willig 1 +▁gestalten 1 +izi 1 +dia 1 +▁Bal 1 +hlen 1 +▁Such 1 +▁verlieren 1 +▁à 1 +gung 1 +▁ließ 1 +▁Mini 1 +▁Beteiligung 1 +PS 1 +▁Einhaltung 1 +ächtig 1 +organisation 1 +ID 1 +▁wichtigste 1 +▁Essen 1 +12 1 +▁Zustand 1 +▁ad 1 +▁Lauf 1 +▁diskutiert 1 +▁Justiz 1 +▁Klasse 1 +fä 1 +▁Lesung 1 +▁umfangreiche 1 +▁Komfort 1 +LE 1 +▁zer 1 +▁Volks 1 +▁Nachrichten 1 +▁erhält 1 +häuser 1 +▁erfordert 1 +anische 1 +▁unternehmen 1 +▁Haut 1 +richtlinie 1 +▁Qualitäts 1 +▁Maße 1 +liefer 1 +▁Kein 1 +▁machte 1 +schnitt 1 +änderung 1 +▁kulturelle 1 +▁Eindruck 1 +▁CD 1 +mission 1 +aut 1 +▁deutsche 1 +tieren 1 +▁Wohnung 1 +▁PC 1 +▁Religion 1 +▁Dorf 1 +▁beliebt 1 +▁Sc 1 +▁höchste 1 +▁wenigen 1 +brauch 1 +▁Wissenschaftler 1 +geordnet 1 +tho 1 +▁ökologisch 1 +▁Regime 1 +AP 1 +▁unterstütze 1 +tion 1 +rö 1 +▁300 1 +▁hergestellt 1 +schrei 1 +▁Fotos 1 +▁Gegenteil 1 +zin 1 +kommt 1 +dit 1 +tragen 1 +▁Camp 1 +▁Wachstums 1 +▁Verkehr 1 +▁Priorität 1 +ani 1 +ood 1 +minister 1 +▁or 1 +▁Standort 1 +▁Handlung 1 +punkte 1 +▁Hafen 1 +▁fair 1 +zeiten 1 +low 1 +nov 1 +▁Bedarf 1 +▁ursprünglich 1 +text 1 +drücke 1 +▁Verträge 1 +▁Terrasse 1 +▁informieren 1 +▁Para 1 +karte 1 +▁König 1 +▁legt 1 +▁freundlich 1 +▁wachsende 1 +▁jungen 1 +ausschuss 1 +▁Betracht 1 +star 1 +pack 1 +▁Schluss 1 +▁Kontinent 1 +gli 1 +zusetzen 1 +▁Kräfte 1 +TE 1 +meister 1 +▁Rot 1 +west 1 +▁Krankheit 1 +gründe 1 +▁sehe 1 +▁historische 1 +ida 1 +bruch 1 +▁Business 1 +▁Barcelona 1 +▁Gerichtshof 1 +▁möglichst 1 +gebildet 1 +▁Patent 1 +▁Leistungen 1 +heiten 1 +▁zusätzlich 1 +grad 1 +▁einiger 1 +▁Produkten 1 +Ab 1 +04 1 +▁kürzlich 1 +▁wusste 1 +▁entdeckt 1 +▁Schwerpunkt 1 +▁Angaben 1 +▁Kosovo 1 +▁bequem 1 +▁Tur 1 +▁wunderbar 1 +▁Gemeinsamen 1 +▁wunderschöne 1 +02 1 +produkt 1 +▁Val 1 +▁kom 1 +▁großes 1 +▁überprüfen 1 +mil 1 +▁11. 1 +▁Treffen 1 +▁betont 1 +▁Dateien 1 +▁Zuständigkeit 1 +uk 1 +kenntnis 1 +staaten 1 +▁Pre 1 +▁kompliziert 1 +▁21 1 +RI 1 +ara 1 +▁Diskriminierung 1 +▁Niveau 1 +▁Eigentum 1 +bie 1 +▁Hauses 1 +organ 1 +information 1 +▁Pat 1 +ep 1 +▁weiterer 1 +▁Fin 1 +▁Pflanzen 1 +▁Flexibilität 1 +▁II 1 +▁schlechte 1 +▁Hin 1 +cha 1 +▁Boot 1 +klär 1 +▁Ruhe 1 +▁Meiner 1 +▁uner 1 +05 1 +▁Forderungen 1 +sagt 1 +▁hundert 1 +▁jeweils 1 +▁Export 1 +▁selten 1 +straße 1 +▁Gesetze 1 +▁Kompetenz 1 +zone 1 +zeichnet 1 +▁niemals 1 +▁Unterschiede 1 +▁Ursache 1 +▁Part 1 +▁Wander 1 +▁Morgen 1 +▁private 1 +▁Verbrechen 1 +▁übrigen 1 +▁Methode 1 +▁Landwirte 1 +▁Experten 1 +weil 1 +▁Datenbank 1 +▁Gleichzeitig 1 +▁Rund 1 +▁pr 1 +züge 1 +▁geplant 1 +ama 1 +▁Halb 1 +▁Einwanderung 1 +air 1 +▁Geschäft 1 +▁Studenten 1 +av 1 +▁sogenannte 1 +▁allgemein 1 +Hotel 1 +▁medizinische 1 +▁Zahlung 1 +Sp 1 +▁Lizenz 1 +pä 1 +ons 1 +wendung 1 +▁freie 1 +ausschusses 1 +▁Laufe 1 +▁gemeinschaft 1 +füge 1 +▁lokale 1 +▁Zugriff 1 +▁Liberalisierung 1 +▁bildet 1 +▁Lehrer 1 +tätigkeit 1 +▁endgültig 1 +▁geöffnet 1 +▁Schreib 1 +▁grundsätzlich 1 +▁verabschiedet 1 +▁schw 1 +т 1 +▁hauptsächlich 1 +dra 1 +lat 1 +bor 1 +märkte 1 +jährige 1 +▁reagieren 1 +tür 1 +▁einzigen 1 +stor 1 +▁Schweiz 1 +not 1 +▁höher 1 +▁Speicher 1 +Wir 1 +à 1 +gie 1 +stellungen 1 +nummer 1 +▁y 1 +▁Amsterdam 1 +AC 1 +▁gebaut 1 +▁Freude 1 +spe 1 +▁kulturellen 1 +tzen 1 +eucht 1 +▁Download 1 +▁Überzeugung 1 +2- 1 +MA 1 +▁3. 1 +rand 1 +ull 1 +▁Streit 1 +▁zugänglich 1 +schloss 1 +▁jüngsten 1 +▁festgestellt 1 +weisung 1 +▁ausgewählt 1 +▁professionelle 1 +einheit 1 +▁Nahrungsmittel 1 +▁Standards 1 +▁Gegensatz 1 +▁Name 1 +▁WTO 1 +produkte 1 +lli 1 +gewalt 1 +▁Gebiete 1 +sbedingungen 1 +▁Koordinierung 1 +leih 1 +▁Beziehung 1 +▁regionalen 1 +▁High 1 +▁informiert 1 +▁wesentliche 1 +ata 1 +ehen 1 +▁fe 1 +▁Messe 1 +▁Marken 1 +vorschlag 1 +▁sauber 1 +▁gehalten 1 +▁Vielzahl 1 +▁solcher 1 +▁Fenster 1 +▁ideale 1 +▁Maß 1 +DA 1 +▁schafft 1 +gab 1 +way 1 +▁Überprüfung 1 +rät 1 +▁irgendeine 1 +▁externe 1 +▁Fo 1 +▁Debian 1 +▁behandeln 1 +▁definiert 1 +link 1 +▁Grundrechte 1 +▁Schulden 1 +lig 1 +ona 1 +▁Drei 1 +▁spricht 1 +▁Seine 1 +▁Letzt 1 +bedingungen 1 +▁britischen 1 +▁erforderlichen 1 +▁Architektur 1 +Li 1 +▁Krankheiten 1 +▁Bett 1 +▁gegenwärtigen 1 +yp 1 +▁Bedrohung 1 +prozess 1 +▁Pass 1 +▁Dia 1 +▁Wirklichkeit 1 +Al 1 +Geschäftsordnung 1 +suche 1 +▁anstatt 1 +oc 1 +möglichkeiten 1 +▁Adresse 1 +▁klicken 1 +ust 1 +qu 1 +▁Jugend 1 +▁akzeptiert 1 +▁ähnliche 1 +▁+ 1 +programme 1 +▁vorliegenden 1 +cc 1 +▁fällt 1 +▁schwarz 1 +▁Rom 1 +▁üblich 1 +▁schönen 1 +▁Arbeiten 1 +▁führte 1 +▁Best 1 +▁Ganz 1 +▁brachte 1 +▁Erklärungen 1 +▁Ei 1 +▁Reg 1 +▁20. 1 +▁Gewicht 1 +▁au 1 +▁Oberfläche 1 +ware 1 +cy 1 +▁« 1 +▁Verletzung 1 +▁Alternative 1 +▁abhängig 1 +pfen 1 +▁unternommen 1 +▁dient 1 +▁trotzdem 1 +gelegt 1 +ise 1 +mobil 1 +▁möglichen 1 +kapital 1 +▁as 1 +▁mussten 1 +▁not 1 +▁kamen 1 +TM 1 +heben 1 +forschung 1 +drückt 1 +mod 1 +FR 1 +CO 1 +▁Sand 1 +▁transparent 1 +▁Krebs 1 +▁Suite 1 +▁ergeben 1 +▁französischen 1 +▁amerikanische 1 +▁Perspektive 1 +▁Zentralbank 1 +▁Weitere 1 +teri 1 +▁empfehlen 1 +▁besitzt 1 +holen 1 +ces 1 +mus 1 +▁Mil 1 +▁Definition 1 +▁Google 1 +▁Berichte 1 +zellen 1 +▁Karten 1 +07 1 +geschäft 1 +▁eröffnet 1 +▁außerordentlich 1 +75 1 +Ö 1 +▁Kombination 1 +▁Beschäftigungs 1 +* 1 +▁Booking 1 +▁billig 1 +▁keinerlei 1 +treffen 1 +sport 1 +tour 1 +▁Unabhängigkeit 1 +▁Doppel 1 +schneid 1 +▁lautet 1 +▁room 1 +▁voller 1 +▁Erhöhung 1 +▁korrekt 1 +▁26 1 +AM 1 +▁Fälle 1 +tät 1 +▁öffentlich 1 +yl 1 +70 1 +wal 1 +▁teilnehmen 1 +grund 1 +▁Berufs 1 +▁Leuten 1 +durch 1 +▁Massen 1 +▁individuell 1 +▁Afghanistan 1 +gebühr 1 +▁Element 1 +▁schließen 1 +lor 1 +bas 1 +nah 1 +zy 1 +sorge 1 +▁eingereicht 1 +▁toll 1 +ular 1 +SA 1 +iziert 1 +▁gutes 1 +▁dieselbe 1 +11 1 +▁28 1 +▁Übereinkommen 1 +ziehung 1 +▁basiert 1 +hand 1 +▁begonnen 1 +spann 1 +▁internationaler 1 +▁reichen 1 +▁läuft 1 +▁erscheinen 1 +▁geschrieben 1 +▁wertvoll 1 +reif 1 +▁21. 1 +fund 1 +▁Glas 1 +▁entsprechen 1 +qualität 1 +в 1 +▁Branche 1 +▁verehrte 1 +▁individuelle 1 +▁Lager 1 +▁langfristig 1 +▁Gegend 1 +pon 1 +▁ausländische 1 +▁Identität 1 +figur 1 +tätig 1 +▁verbundenen 1 +▁Min 1 +▁enorme 1 +▁Zeiten 1 +▁pre 1 +fort 1 +▁Regelungen 1 +ED 1 +maß 1 +▁Beihilfen 1 +▁Arbeitsmarkt 1 +▁ausgewogen 1 +▁zählen 1 +▁Farben 1 +▁Bill 1 +präsent 1 +scheid 1 +frist 1 +▁reicht 1 +▁Gebrauch 1 +gebunden 1 +..." 1 +▁guter 1 +▁vorschlagen 1 +▁erzählen 1 +▁angemessene 1 +IM 1 +jährigen 1 +▁gesetzlich 1 +▁speziellen 1 +▁entstanden 1 +ju 1 +wür 1 +▁glücklich 1 +▁Nein 1 +gut 1 +▁Erinnerung 1 +▁Flüchtlinge 1 +▁zuerst 1 +namen 1 +▁produziert 1 +fähigkeit 1 +mos 1 +▁politisch 1 +▁Tourismus 1 +▁Fähigkeiten 1 +{ 1 +▁erinnert 1 +▁Erwachsene 1 +▁from 1 +getreten 1 +rot 1 +ley 1 +Gästebewertungen 1 +isches 1 +ET 1 +▁Last 1 +▁bisschen 1 +▁Empfehlungen 1 +▁Prä 1 +▁wodurch 1 +▁grüne 1 +ball 1 +▁Feuer 1 +IP 1 +▁Geräte 1 +well 1 +gezogen 1 +▁verb 1 +▁Gemeinsam 1 +▁1998 1 +cial 1 +▁gesellschaft 1 +strahl 1 +'' 1 +▁Gerät 1 +▁seien 1 +▁Englisch 1 +▁Pl 1 +haben 1 +kauf 1 +IA 1 +▁Nachfrage 1 +▁Agenda 1 +kette 1 +▁Col 1 +▁Verhältnis 1 +▁Minderheiten 1 +tain 1 +MS 1 +▁Antworten 1 +▁Zentral 1 +▁geboren 1 +fahrzeug 1 +▁rück 1 +wirkt 1 +▁Code 1 +stanz 1 +vid 1 +▁Nachdem 1 +▁gelungen 1 +▁Aktionen 1 +▁TV 1 +▁Buchung 1 +▁Ebenso 1 +▁garantieren 1 +▁Sprach 1 +▁Ger 1 +wort 1 +▁langfristige 1 +▁Teilen 1 +▁sorgfältig 1 +▁Wohlstand 1 +▁General 1 +▁Anwendungen 1 +▁Asyl 1 +unt 1 +▁Betriebs 1 +▁langen 1 +▁treten 1 +ua 1 +▁wiederholt 1 +abel 1 +▁angeht 1 +▁Anlagen 1 +ita 1 +▁Schrift 1 +▁betroffenen 1 +leistungen 1 +▁unmöglich 1 +▁sagten 1 +kra 1 +plätze 1 +▁Problemen 1 +▁nationaler 1 +ax 1 +▁Dokumente 1 +ätz 1 +▁Regierungs 1 +▁gestern 1 +ringen 1 +smaßnahmen 1 +▁Veranstaltung 1 +▁Unterkunft 1 +▁Eigenschaften 1 +▁umgehen 1 +▁IT 1 +ij 1 +tic 1 +ré 1 +▁sodass 1 +▁Klicken 1 +▁Serie 1 +▁Gleichgewicht 1 +40 1 +▁herunter 1 +▁verbreitet 1 += 1 +uck 1 +▁sozial 1 +ence 1 +▁Stimm 1 +ebene 1 +▁Schüler 1 +▁Methoden 1 +istisch 1 +ül 1 +▁Bestellung 1 +▁dürfte 1 +▁Par 1 +▁Delegation 1 +▁vereinbart 1 +PL 1 +▁Fluss 1 +▁My 1 +▁Baby 1 +flu 1 +organisationen 1 +▁Symbol 1 +üß 1 +▁Asien 1 +▁Portugal 1 +top 1 +üchte 1 +▁einverstanden 1 +▁Teilnahme 1 +▁Sehr 1 +▁Stunde 1 +▁Fort 1 +aria 1 +reform 1 +▁Hall 1 +ada 1 +NE 1 +film 1 +▁Vision 1 +lution 1 +▁Hilfs 1 +Innen 1 +ture 1 +▁Austausch 1 +▁rot 1 +gas 1 +▁außergewöhnlich 1 +transport 1 +▁Stern 1 +▁Cu 1 +wehr 1 +▁digitale 1 +gesellschaften 1 +▁Amerikaner 1 +▁Sachen 1 +CE 1 +▁sicherstellen 1 +▁ro 1 +itter 1 +▁Glauben 1 +▁erzeugt 1 +▁Anlage 1 +▁Bildungs 1 +▁Mitteln 1 +SP 1 +▁Jugendliche 1 +▁erhebliche 1 +▁Denken 1 +▁dauerhaft 1 +▁Abschnitt 1 +▁erneuerbare 1 +▁vielmehr 1 +vis 1 +▁Mut 1 +▁geringer 1 +▁tat 1 +▁Ph 1 +▁wiederum 1 +▁ländlichen 1 +▁kurze 1 +▁Mir 1 +▁Wünsche 1 +▁höchst 1 +▁Zoll 1 +▁Entwicklungen 1 +verkehrs 1 +▁* 1 +▁kontextuell 1 +▁Voraussetzungen 1 +maschine 1 +suchen 1 +führt 1 +gerät 1 +direkt 1 +kontrolle 1 +▁Montag 1 +quelle 1 +▁Bücher 1 +▁klassische 1 +▁verstanden 1 +▁Niederlande 1 +▁entscheidende 1 +chel 1 +▁hilft 1 +pen 1 +Pla 1 +henswürdigkeiten 1 +Ch 1 +▁entdecken 1 +▁zugleich 1 +usch 1 +▁Krisen 1 +ish 1 +▁are 1 +▁Veränderung 1 +▁Beim 1 +neu 1 +▁Geschlecht 1 +▁Kilometer 1 +zung 1 +▁Zins 1 +zubringen 1 +▁andererseits 1 +▁Grad 1 +▁Effizienz 1 +▁Einklang 1 +gres 1 +▁ru 1 +cal 1 +▁vieler 1 +▁wesentlichen 1 +Pa 1 +▁CA 1 +▁Umständen 1 +▁vorbei 1 +▁traditionellen 1 +▁keiner 1 +▁Süden 1 +▁besseren 1 +schritt 1 +▁frisch 1 +▁verkauft 1 +EC 1 +03 1 +▁mittels 1 +▁Revolution 1 +▁positiven 1 +stä 1 +dri 1 +gelöst 1 +gelt 1 +▁abgestimmt 1 +▁öffnen 1 +▁35 1 +dreh 1 +▁Regulierung 1 +gro 1 +▁it 1 +entwurf 1 +▁spät 1 +partei 1 +▁Orte 1 +▁hört 1 +schiff 1 +post 1 +▁Einkaufs 1 +▁Grenze 1 +▁diesbezüglich 1 +▁Wohl 1 +▁Aussage 1 +▁Immobilien 1 +▁weiteres 1 +▁Muster 1 +gebiete 1 +▁Mitgliedern 1 +▁beachten 1 +▁religiöse 1 +illa 1 +vari 1 +▁Bedenken 1 +▁besucht 1 +isierte 1 +▁maximal 1 +ira 1 +▁Nieder 1 +▁trans 1 +Datei 1 +▁gefördert 1 +LA 1 +▁www 1 +▁Versorgung 1 +▁namens 1 +▁mon 1 +TA 1 +▁Farb 1 +▁beschränkt 1 +ham 1 +▁Gal 1 +▁betreffen 1 +} 1 +▁seitens 1 +▁Gang 1 +▁gestaltet 1 +▁erlebt 1 +bro 1 +bauen 1 +ächte 1 +▁befassen 1 +▁Bel 1 +anda 1 +▁BIP 1 +Г 1 +ati 1 +▁illegale 1 +▁Anliegen 1 +▁Nahen 1 +▁Ter 1 +▁landwirtschaft 1 +▁Verbot 1 +▁Geschichten 1 +▁Planung 1 +▁Blog 1 +▁Internationalen 1 +▁Programms 1 +och 1 +▁weltweiten 1 +▁übernommen 1 +▁Schwarz 1 +▁Schweden 1 +Shop 1 +fol 1 +▁zuständig 1 +gezahlt 1 +▁Ausmaß 1 +▁wirksame 1 +▁schauen 1 +▁deiner 1 +▁Dort 1 +▁Beispiele 1 +▁Sanktionen 1 +▁beliebig 1 +14 1 +▁Job 1 +▁wann 1 +▁Neue 1 +tle 1 +▁Trotz 1 +▁erlauben 1 +▁Liefer 1 +▁herrliche 1 +cre 1 +car 1 +▁trifft 1 +▁les 1 +fisch 1 +65 1 +kar 1 +▁riesigen 1 +▁dynamisch 1 +▁Diskussionen 1 +▁gesunde 1 +▁derzeitigen 1 +LI 1 +▁ausgezeichnete 1 +▁leiden 1 +▁installiert 1 +▁Formular 1 +▁pu 1 +▁Gründung 1 +▁Mittelmeer 1 +▁Kollege 1 +▁gewonnen 1 +▁anbieten 1 +▁Tausende 1 +▁bedarf 1 +▁Redner 1 +▁entschlossen 1 +▁Institution 1 +tief 1 +▁Handeln 1 +▁vollständige 1 +▁Open 1 +▁Noch 1 +▁vorgeschlagenen 1 +▁Anreiz 1 +▁Datenschutz 1 +▁bereitgestellt 1 +krise 1 +rank 1 +▁Tief 1 +durchschnittlich 1 +▁erkannt 1 +▁Mangel 1 +▁grundlegenden 1 +▁dis 1 +▁Star 1 +pflanz 1 +▁griechische 1 +▁erhielt 1 +OL 1 +▁Gespräche 1 +▁Unternehmer 1 +▁höchsten 1 +cia 1 +▁nochmals 1 +▁Leistungs 1 +Veröffentlichung 1 +▁pri 1 +▁modern 1 +25 1 +▁Statistik 1 +▁World 1 +▁Modul 1 +▁kämpfen 1 +▁Fischer 1 +▁Vertrieb 1 +▁außen 1 +▁Linux 1 +▁Erwartungen 1 +▁Rumänien 1 +▁gelöst 1 +▁Wal 1 +▁Kol 1 +niveau 1 +vest 1 +▁konstruktiv 1 +▁Sonne 1 +▁erzählt 1 +▁sichtbar 1 +▁Ziffer 1 +▁Andere 1 +▁Erholung 1 +▁Einwohner 1 +pul 1 +48 1 +▁Links 1 +▁Effekt 1 +▁bekämpfen 1 +▁Medi 1 +▁Strategien 1 +oli 1 +▁Hol 1 +▁Kommunikations 1 +▁attraktiv 1 +▁Spiele 1 +dre 1 +▁Fraktionen 1 +pan 1 +atoren 1 +▁Ehe 1 +aufnahme 1 +nor 1 +▁Materialien 1 +▁Za 1 +▁Bra 1 +▁Leitlinien 1 +▁verlangt 1 +▁liefern 1 +▁siehe 1 +▁Direkt 1 +▁John 1 +▁bestimmen 1 +▁Char 1 +▁Sach 1 +▁Kaffee 1 +▁eigenes 1 +▁führenden 1 +tom 1 +▁Haushaltsplan 1 +körper 1 +▁Kann 1 +▁ausführlich 1 +▁verdient 1 +luft 1 +anlage 1 +▁Kandidat 1 +einrichtungen 1 +▁Temperatur 1 +ura 1 +▁Himmel 1 +▁ergriffen 1 +▁Regen 1 +▁Gesch 1 +austausch 1 +pre 1 +bit 1 +of 1 +▁gew 1 +▁schnelle 1 +gebung 1 +▁stellte 1 +▁Häuser 1 +▁erfolgreiche 1 +▁Reich 1 +geräte 1 +▁Mac 1 +▁Dingen 1 +mini 1 +▁Bauern 1 +▁komfortable 1 +▁Stärke 1 +▁besitzen 1 +▁konzentriert 1 +▁Voraussetzung 1 +▁verbinden 1 +▁Aufgrund 1 +▁Elektro 1 +▁Fläche 1 +lehr 1 +▁technologische 1 +▁Empfehlung 1 +▁zustimmen 1 +tech 1 +La 1 +▁konkreten 1 +▁sorgt 1 +98 1 +▁Wandel 1 +▁29 1 +▁Nummer 1 +08 1 +änder 1 +▁Fehl 1 +▁flexible 1 +überschreitende 1 +begriff 1 +▁kurzem 1 +▁Format 1 +forderung 1 +fra 1 +▁Jedes 1 +▁verlangen 1 +▁umzusetzen 1 +▁gemütliche 1 +▁anti 1 +▁gearbeitet 1 +▁annehmen 1 +▁Dadurch 1 +▁Strukturen 1 +▁Hinter 1 +▁Erzeugnisse 1 +▁Norden 1 +▁hast 1 +ella 1 +▁Güter 1 +▁bestehende 1 +ique 1 +▁Regional 1 +spezifisch 1 +▁türkische 1 +▁Praktik 1 +Emissionen 1 +geschlossen 1 +▁Autos 1 +bli 1 +л 1 +▁finanziert 1 +streit 1 +ping 1 +fällen 1 +isierten 1 +▁hoher 1 +Fi 1 +▁fühlt 1 +▁bezieht 1 +ico 1 +▁senden 1 +▁fragte 1 +▁chemische 1 +fried 1 +pat 1 +▁Wähler 1 +▁bezahlen 1 +PA 1 +tausend 1 +Ü 1 +▁Beruf 1 +Berücksichtigung 1 +▁zweifellos 1 +▁zeit 1 +▁umgeben 1 +▁Schatten 1 +▁Nachricht 1 +▁Fri 1 +▁Strukturfonds 1 +▁hängt 1 +code 1 +▁beobachten 1 +▁bekommt 1 +▁Publikum 1 +▁beschrieben 1 +▁Alles 1 +gemeinschaft 1 +▁Kreis 1 +ong 1 +geschrieben 1 +Pro 1 +▁welchen 1 +maschinen 1 +80 1 +▁Maßnahme 1 +▁berechtigt 1 +fällt 1 +smöglichkeiten 1 +cho 1 +▁Wellness 1 +leit 1 +▁Medizin 1 +karten 1 +▁Institut 1 +▁gewährt 1 +miet 1 +▁angesehen 1 +▁lag 1 +bat 1 +▁Zeitung 1 +por 1 +▁Kontext 1 +▁Verhandlungs 1 +▁Del 1 +▁international 1 +▁Farbe 1 +▁wenden 1 +▁Führer 1 +▁Ohne 1 +▁Dafür 1 +▁Labor 1 +09 1 +verhalten 1 +▁Umfeld 1 +▁betreffend 1 +▁Werbe 1 +▁schwierigen 1 +▁pla 1 +plant 1 +▁fehlt 1 +▁willkommen 1 +▁eigen 1 +▁Schulen 1 +▁Vom 1 +erfolg 1 +breite 1 +IL 1 +▁City 1 +▁Beschluss 1 +▁Wesen 1 +▁weshalb 1 +▁zuverlässig 1 +effekt 1 +gno 1 +▁Studio 1 +▁Gestaltung 1 +wind 1 +▁ehemaligen 1 +▁Klimaanlage 1 +▁Mittelpunkt 1 +▁irische 1 +▁Zeug 1 +▁1997 1 +▁Milch 1 +▁kostenfrei 1 +▁kostet 1 +▁übrigens 1 +▁friedlich 1 +▁Allgemeinen 1 +old 1 +▁Korruption 1 +▁Auge 1 +▁existieren 1 +▁Küste 1 +▁britische 1 +▁bewegt 1 +ierbar 1 +Adresse 1 +▁ausgesetzt 1 +▁Tal 1 +▁Universum 1 +▁Theater 1 +▁Früh 1 +zuhalten 1 +für 1 +fü 1 +▁exklusiv 1 +▁Schuld 1 +▁einig 1 +Der 1 +▁Schönheit 1 +▁gezwungen 1 +immt 1 +schreiben 1 +▁Mode 1 +▁Cre 1 +▁traditionelle 1 +▁Denk 1 +▁offenen 1 +▁stolz 1 +itu 1 +▁Alt 1 +▁Geschwindigkeit 1 +▁Hund 1 +▁starken 1 +▁staatliche 1 +▁Verfügbarkeit 1 +▁eventuell 1 +▁Ferner 1 +▁Konzert 1 +▁Vermittlung 1 +▁Bucht 1 +▁bemüht 1 +▁massiv 1 +▁Würde 1 +▁Kommentar 1 +▁essen 1 +▁neun 1 +▁Comp 1 +▁Umweltschutz 1 +lief 1 +▁Sorgen 1 +▁vorgenommen 1 +▁sub 1 +▁plötzlich 1 +▁Normen 1 +▁ausgerichtet 1 +▁Beiträge 1 +▁Trend 1 +▁(1 1 +▁präsentiert 1 +situation 1 +geschichte 1 +▁Kapitel 1 +lock 1 +18 1 +▁Männern 1 +▁Umgang 1 +ält 1 +▁Harmonisierung 1 +▁existiert 1 +▁entlang 1 +▁Uni 1 +hang 1 +▁erhältlich 1 +▁Kongress 1 +▁Sub 1 +▁aufzunehmen 1 +▁innere 1 +▁verurteilt 1 +▁Prinzipien 1 +▁Mobilität 1 +▁Traum 1 +▁einfacher 1 +▁Ag 1 +▁Microsoft 1 +▁ehrlich 1 +pläne 1 +▁richten 1 +▁Kapazität 1 +gerecht 1 +▁Stadtzentrum 1 +EM 1 +▁heutige 1 +bert 1 +▁gezogen 1 +▁achten 1 +▁150 1 +▁hab 1 +▁Heimat 1 +rseits 1 +poli 1 +▁Stellung 1 +▁Konsens 1 +▁Handy 1 +Rezeption 1 +▁abgelehnt 1 +▁reduziert 1 +▁Anbieter 1 +▁investieren 1 +▁Fleisch 1 +▁zählt 1 +▁Dach 1 +leute 1 +regelung 1 +▁vielfältig 1 +amerikanische 1 +▁entwickelten 1 +▁breit 1 +▁Wichtig 1 +sländer 1 +decken 1 +MO 1 +schreiten 1 +urteil 1 +rechnen 1 +▁Brasilien 1 +▁gespeichert 1 +▁This 1 +▁verkaufen 1 +Kom 1 +▁inzwischen 1 +▁egal 1 +▁afrikanische 1 +▁aktive 1 +▁verringern 1 +▁spezifische 1 +▁Radio 1 +gekommen 1 +▁glaubt 1 +34 1 +stimme 1 +▁Tim 1 +▁echten 1 +bewegung 1 +▁Plattform 1 +▁Grand 1 +с 1 +▁Eisenbahn 1 +▁kreativ 1 +▁Herkunft 1 +zukommen 1 +anti 1 +zehn 1 +▁Dauer 1 +▁Vorgehen 1 +▁beraten 1 +▁Übereinstimmung 1 +Ro 1 +stimmung 1 +▁Verbesserungen 1 +▁Mark 1 +▁beträchtlich 1 +▁Viertel 1 +▁Altstadt 1 +verbrauch 1 +▁umfassenden 1 +lett 1 +▁Operation 1 +▁Real 1 +stätte 1 +▁Drittens 1 +▁Maschine 1 +▁emp 1 +▁wert 1 +geld 1 +tschaftswachstum 1 +▁Geschäfte 1 +▁französische 1 +dank 1 +▁sterben 1 +▁Festlegung 1 +▁Ausführung 1 +▁hinein 1 +▁Widerstand 1 +Mobil 1 +▁durchzuführen 1 +PT 1 +▁$ 1 +▁mod 1 +raub 1 +▁Besorgnis 1 +▁Leid 1 +▁mögen 1 +uc 1 +▁begrüßt 1 +TV 1 +finden 1 +▁Veranstaltungen 1 +@ 1 +freundliche 1 +The 1 +▁letzter 1 +verwaltung 1 +▁sichern 1 +▁offene 1 +▁NATO 1 +▁vorbereitet 1 +▁fordere 1 +▁Zahlungs 1 +▁Original 1 +▁Gebühr 1 +▁Top 1 +▁verbringen 1 +▁Agrarpolitik 1 +▁aufnehmen 1 +flo 1 +▁Gefängnis 1 +▁1996 1 +▁Schäden 1 +▁berühmten 1 +▁bestand 1 +Tech 1 +▁Update 1 +▁bekam 1 +▁unterzeichnet 1 +▁UNO 1 +▁Ol 1 +° 1 +▁sofern 1 +▁nieder 1 +▁italienischen 1 +datei 1 +▁eure 1 +▁einerseits 1 +So 1 +▁betreffenden 1 +▁Aktien 1 +sprach 1 +kunft 1 +▁gerichtet 1 +version 1 +▁Urteil 1 +▁verleihen 1 +▁chinesischen 1 +▁dahin 1 +24 1 +fahrer 1 +▁starten 1 +64 1 +▁Inseln 1 +▁Arbeitsplatz 1 +▁Opposition 1 +▁entsteht 1 +▁Experiment 1 +▁Spannung 1 +▁westlichen 1 +▁anwesend 1 +38 1 +▁hinweg 1 +▁Fern 1 +nehmer 1 +▁verhindert 1 +▁Frist 1 +▁Kreditkarte 1 +▁Angebote 1 +▁Hauptstadt 1 +uliert 1 +menge 1 +▁Details 1 +▁Front 1 +▁Freizeit 1 +▁beträgt 1 +95 1 +▁detailliert 1 +▁kontrolliert 1 +▁südlich 1 +▁IN 1 +▁Emp 1 +zieren 1 +▁riesige 1 +▁Verbreitung 1 +▁Verringerung 1 +▁Übernachtung 1 +▁durchführen 1 +▁Deck 1 +wächst 1 +▁Tool 1 +fragt 1 +▁Stimmen 1 +bestimmung 1 +▁Genehmigung 1 +▁geprüft 1 +▁irgendwie 1 +▁tausend 1 +▁Zivil 1 +▁industrielle 1 +▁künftigen 1 +alismus 1 +▁einheitliche 1 +ih 1 +web 1 +к 1 +▁gelegt 1 +▁Mari 1 +▁Städten 1 +▁behalten 1 +xi 1 +▁amtierende 1 +▁Center 1 +▁enorm 1 +pot 1 +▁Roboter 1 +▁staatlichen 1 +▁gewissen 1 +▁Verein 1 +ctor 1 +fremd 1 +▁Grafik 1 +▁Werbung 1 +▁Marketing 1 +lösen 1 +▁strategische 1 +ami 1 +▁Verantwortlich 1 +▁gefragt 1 +▁Chancen 1 +▁geehrte 1 +cro 1 +▁Speise 1 +ME 1 +▁ehrgeizig 1 +▁sonstige 1 +▁Faktoren 1 +▁Touristen 1 +▁Stamm 1 +hung 1 +▁Haftung 1 +▁passieren 1 +▁schicken 1 +ato 1 +abkommen 1 +▁Satz 1 +▁Stahl 1 +wald 1 +▁hervorragend 1 +ont 1 +▁Bewusstsein 1 +basierte 1 +ola 1 +ativen 1 +▁Mc 1 +avi 1 +phase 1 +quote 1 +▁Kat 1 +füll 1 +▁ursprünglichen 1 +▁nachhaltigen 1 +▁uneingeschränkt 1 +wohl 1 +▁Ursprung 1 +▁Ehre 1 +▁beruht 1 +▁reduzieren 1 +▁Brief 1 +greif 1 +▁Menü 1 +schlüssel 1 +▁hielt 1 +Projekt 1 +▁Ernährung 1 +▁produzieren 1 +▁hinzufügen 1 +▁Drittländern 1 +▁zusätzlichen 1 +▁Intervention 1 +hebung 1 +play 1 +▁Solar 1 +▁auftreten 1 +ierende 1 +▁Journalist 1 +▁doppelt 1 +▁letztendlich 1 +verbindung 1 +▁schwach 1 +▁besch 1 +▁= 1 +▁Paul 1 +▁Zusatz 1 +ulation 1 +▁para 1 +cio 1 +kontroll 1 +▁letztlich 1 +▁schrieb 1 +östlich 1 +▁effiziente 1 +▁erzeugen 1 +IE 1 +▁ausgesprochen 1 +▁Ausführungen 1 +▁heran 1 +phi 1 +▁zugun 1 +tausch 1 +kunde 1 +37 1 +▁aufgefordert 1 +format 1 +▁niedriger 1 +▁untersucht 1 +▁verteilt 1 +kleid 1 +ierenden 1 +lip 1 +▁überein 1 +▁DVD 1 +▁Sein 1 +Mo 1 +▁Terror 1 +falt 1 +abend 1 +▁gelernt 1 +▁nutzt 1 +▁solange 1 +▁zurückzu 1 +vol 1 +▁Sicherung 1 +auto 1 +ñ 1 +▁Plenum 1 +ström 1 +▁sur 1 +▁bemühen 1 +▁Bahnhof 1 +▁bezug 1 +▁ausgeführt 1 +din 1 +ging 1 +▁hervorheben 1 +▁Lernen 1 +▁Spaß 1 +▁Ganze 1 +lesen 1 +▁Inflation 1 +fluss 1 +▁bezahlt 1 +▁Fitness 1 +▁Spar 1 +▁Gespräch 1 +▁Browser 1 +▁angepasst 1 +▁Respekt 1 +▁Beste 1 +▁Stock 1 +▁Volkswirtschaft 1 +ical 1 +96 1 +▁effizient 1 +äumt 1 +verletzung 1 +DER 1 +▁Marke 1 +▁2020 1 +▁Anstieg 1 +▁beschäftigen 1 +▁spanischen 1 +▁Steuern 1 +▁Kranken 1 +▁Tochter 1 +▁gratulieren 1 +▁dauert 1 +ifiziert 1 +schluß 1 +▁erleichtern 1 +Bahn 1 +▁Menschheit 1 +▁Kunde 1 +▁beglückwünschen 1 +▁Zucker 1 +pal 1 +▁palästinensisch 1 +▁bestätigen 1 +▁jemals 1 +▁führende 1 +▁Strecke 1 +GE 1 +▁renoviert 1 +▁freiwillig 1 +stil 1 +antwort 1 +▁Digital 1 +▁Schlafzimmer 1 +gin 1 +▁geprägt 1 +97 1 +center 1 +▁wiederholen 1 +▁zerstört 1 +▁untersuchen 1 +▁Hor 1 +kret 1 +š 1 +läufe 1 +▁Todes 1 +▁geistige 1 +team 1 +▁zivil 1 +▁Stoffe 1 +schütt 1 +ç 1 +▁Zusammenhalt 1 +▁Tele 1 +gramm 1 +zü 1 +UR 1 +Lo 1 +▁neuesten 1 +▁Will 1 +▁herrscht 1 +komp 1 +17 1 +▁Forum 1 +▁Fünf 1 +▁berufliche 1 +▁soweit 1 +▁Kohle 1 +Bo 1 +▁Appartement 1 +▁Deswegen 1 +▁entspannen 1 +rühr 1 +▁zuständigen 1 +▁Bulgarien 1 +▁Räume 1 +▁Küsten 1 +▁aussehen 1 +Strategie 1 +▁hand 1 +▁zufolge 1 +▁Detail 1 +bal 1 +▁KMU 1 +▁knapp 1 +▁enger 1 +▁Sohn 1 +м 1 +▁unverzüglich 1 +anstalt 1 +▁Sind 1 +▁Beschlüsse 1 +▁Freunden 1 +16 1 +lohn 1 +▁Mission 1 +▁illegal 1 +▁Apartments 1 +▁Vorschlägen 1 +▁Mandat 1 +▁Bibliothek 1 +▁miss 1 +▁Gästen 1 +▁400 1 +▁Geschmack 1 +▁parlamentarisch 1 +▁kontrollieren 1 +▁Pläne 1 +49 1 +ado 1 +liegenden 1 +▁Hohen 1 +▁englische 1 +▁liefert 1 +beauftragte 1 +▁jener 1 +▁Cap 1 +▁Pakistan 1 +▁Audio 1 +▁Anfragen 1 +▁Metall 1 +▁Schlag 1 +▁Sehen 1 +▁scheinen 1 +amt 1 +ivität 1 +lenk 1 +▁israelisch 1 +fährt 1 +▁Beamte 1 +▁Nachbarn 1 +konferenz 1 +▁Event 1 +füllt 1 +▁Sicher 1 +Tra 1 +85 1 +▁Arbeitsplätzen 1 +▁Renn 1 +wärts 1 +reicht 1 +▁Pop 1 +▁Gerechtigkeit 1 +▁fein 1 +europa 1 +assi 1 +▁Gesetzes 1 +▁beteiligen 1 +dor 1 +▁Ferienwohnung 1 +▁Rechtsgrundlage 1 +▁nahezu 1 +grün 1 +gibt 1 +skonferenz 1 +▁verschiedener 1 +NS 1 +▁Defizit 1 +▁Aktionsplan 1 +▁ersetzt 1 +▁Sä 1 +▁offenbar 1 +▁gefährdet 1 +▁getrennt 1 +Di 1 +branche 1 +▁par 1 +▁Kabel 1 +▁qualifizierte 1 +auch 1 +▁höheren 1 +▁kurzfristig 1 +▁bilaterale 1 +▁Akteure 1 +ermaßen 1 +▁Ausgabe 1 +cor 1 +▁Aktiv 1 +▁32 1 +Ar 1 +▁ferner 1 +▁anschließend 1 +▁Potenzial 1 +▁weitgehend 1 +▁Martin 1 +▁Live 1 +▁Abfall 1 +▁Bemerkungen 1 +▁sucht 1 +06 1 +▁umfassend 1 +azi 1 +▁Angel 1 +▁Verarbeitung 1 +▁spreche 1 +ruhe 1 +▁ausgeschlossen 1 +▁gedacht 1 +▁erstaunlich 1 +▁schwedische 1 +▁Wien 1 +▁verdienen 1 +▁Costa 1 +▁Lieferung 1 +▁Nahrung 1 +▁elegante 1 +▁Sha 1 +▁Training 1 +olo 1 +▁moralische 1 +▁zwingen 1 +meldung 1 +ward 1 +▁konkret 1 +ident 1 +39 1 +▁Zypern 1 +▁Ausstattung 1 +▁Automobil 1 +option 1 +iff 1 +steck 1 +TO 1 +gewinn 1 +▁konsequent 1 +▁Haar 1 +▁ändert 1 +günstig 1 +▁spezialisiert 1 +▁Konsequenzen 1 +▁Mrd 1 +SL 1 +▁Renten 1 +IR 1 +▁Rohstoff 1 +▁Anzeige 1 +table 1 +freund 1 +trifft 1 +▁Drittel 1 +▁Ozean 1 +Vi 1 +▁Einschränkung 1 +▁Kroatien 1 +schieben 1 +▁Eingang 1 +▁blieb 1 +förder 1 +▁beendet 1 +▁Großteil 1 +mail 1 +▁sexuelle 1 +konzept 1 +▁jenen 1 +68 1 +▁Existenz 1 +▁Gepäck 1 +▁Einbeziehung 1 +▁verstärken 1 +zunehmen 1 +▁van 1 +▁spezifischen 1 +▁Konvent 1 +▁Tru 1 +zusammenarbeiten 1 +▁Sammlung 1 +▁Entlastung 1 +▁Armee 1 +▁Unterhaltung 1 +läuft 1 +▁geäußert 1 +▁zugrunde 1 +▁Sex 1 +▁Außenpolitik 1 +▁ansehen 1 +▁Bur 1 +▁Darum 1 +▁Konzern 1 +▁unterscheiden 1 +▁Mach 1 +55 1 +▁qualitativ 1 +fertig 1 +▁Tauch 1 +tüm 1 +zusehen 1 +▁schlägt 1 +▁Obama 1 +▁Augenblick 1 +spräsident 1 +▁Bereitstellung 1 +▁Erlebnis 1 +dar 1 +▁kontinuierlich 1 +▁generell 1 +▁gültig 1 +institut 1 +▁schlagen 1 +fahr 1 +personal 1 +▁Plat 1 +lot 1 +▁Weiß 1 +▁Pra 1 +44 1 +▁Kampagne 1 +▁Brücke 1 +put 1 +▁Bush 1 +▁Bahn 1 +▁schrecklich 1 +▁Gleichstellung 1 +▁dargestellt 1 +à 1 +▁1990 1 +▁Power 1 +▁ausgezeichneten 1 +▁alternative 1 +Euro 1 +HO 1 +hilf 1 +steig 1 +▁Beach 1 +▁Geburt 1 +▁beigetragen 1 +▁Va 1 +▁Kreuz 1 +waffen 1 +▁beantworten 1 +▁Öko 1 +▁Dich 1 +▁TED 1 +schätze 1 +bedingt 1 +▁Balkan 1 +▁Medikamente 1 +▁Wild 1 +▁multi 1 +▁Kanada 1 +Vor 1 +Ja 1 +▁Erfüllung 1 +▁sitzen 1 +▁Bord 1 +strategie 1 +▁Maus 1 +bla 1 +▁Reduzierung 1 +▁Bühne 1 +▁Zusätzlich 1 +check 1 +▁Nachbarschaft 1 +▁Theorie 1 +▁sozialer 1 +▁Weltkrieg 1 +bio 1 +▁Ausgangspunkt 1 +▁Kopenhagen 1 +▁Widerspruch 1 +▁Gewährleistung 1 +wandel 1 +ografische 1 +▁Phänomen 1 +▁Casino 1 +▁zukünftige 1 +▁Ruf 1 +▁mächtig 1 +▁Wett 1 +▁Ersatz 1 +▁realistisch 1 +CD 1 +mul 1 +▁geltenden 1 +▁verbindlich 1 +▁Islam 1 +▁arabische 1 +▁behaupten 1 +▁nachdenken 1 +▁Bildschirm 1 +▁normalerweise 1 +Länder 1 +sache 1 +turm 1 +¤ 1 +▁verteidigen 1 +▁1995 1 +Was 1 +▁Verlauf 1 +flüge 1 +mensch 1 +▁relevant 1 +losigkeit 1 +▁Hochschul 1 +▁Summe 1 +▁Staates 1 +heil 1 +▁Provinz 1 +▁systematisch 1 +ji 1 +▁Kenn 1 +▁Insbesondere 1 +▁Schengen 1 +▁Gute 1 +▁russischen 1 +▁Können 1 +▁dramatisch 1 +büro 1 +▁Wand 1 +▁berühmte 1 +▁Wechsel 1 +▁potenziell 1 +▁global 1 +▁page 1 +▁Charta 1 +▁Übrigen 1 +▁leichter 1 +▁genügend 1 +▁Besonders 1 +▁ordnungsgemäß 1 +▁schönsten 1 +▁that 1 +▁Autobahn 1 +▁Katastrophe 1 +kampf 1 +▁chinesische 1 +▁traf 1 +▁ergänzen 1 +▁Ungarn 1 +▁gespielt 1 +▁Lä 1 +▁lieber 1 +▁Glaubwürdigkeit 1 +▁übernimmt 1 +▁Kap 1 +IF 1 +▁Free 1 +▁Badezimmer 1 +▁eingegangen 1 +▁geboten 1 +▁genetisch 1 +leiter 1 +▁Schalt 1 +▁beeinflussen 1 +cur 1 +▁my 1 +beziehungen 1 +▁45 1 +geschickt 1 +▁comp 1 +▁entscheidender 1 +▁bloß 1 +▁organisieren 1 +▁vermitteln 1 +▁äußern 1 +▁hoffentlich 1 +▁Marktes 1 +design 1 +▁Laut 1 +▁Ärzte 1 +▁betreiben 1 +heiß 1 +graben 1 +▁rechtlichen 1 +▁beeindruckend 1 +▁präzise 1 +▁angewandt 1 +media 1 +▁Finanzkrise 1 +▁Klar 1 +lieferung 1 +block 1 +▁Hostels 1 +eisen 1 +gängig 1 +läufig 1 +▁Verteilung 1 +▁Anlass 1 +▁Steuerung 1 +▁Öffnung 1 +▁Pension 1 +▁gewiss 1 +▁eingebracht 1 +▁2011 1 +trauen 1 +▁Netze 1 +▁Treib 1 +▁Darstellung 1 +▁Schwimm 1 +▁Tabelle 1 +▁Schlacht 1 +86 1 +▁Abhängigkeit 1 +▁München 1 +ón 1 +▁Gegenstand 1 +▁ersetzen 1 +größe 1 +▁Frankfurt 1 +▁Subsidiarität 1 +▁hell 1 +▁Bemerkung 1 +NA 1 +▁bislang 1 +prüfung 1 +13 1 +▁froh 1 +▁vorlegen 1 +▁mittleren 1 +▁Leitung 1 +▁installieren 1 +▁Schloss 1 +▁vorherige 1 +name 1 +▁Eurozone 1 +▁Stufe 1 +▁Betrag 1 +▁Beschränkung 1 +▁selben 1 +▁Fla 1 +pool 1 +▁überprüft 1 +▁rechtliche 1 +▁Flüge 1 +Distribution 1 +▁verbindet 1 +▁ethnische 1 +erklärung 1 +▁Versammlung 1 +▁gehe 1 +ausschuß 1 +▁Haben 1 +graph 1 +▁Pflege 1 +oma 1 +▁Kra 1 +klasse 1 +▁Rechnungshof 1 +▁gebeten 1 +▁Komponenten 1 +▁IP 1 +▁Vorhaben 1 +▁Satelliten 1 +▁sammeln 1 +möglich 1 +lässlich 1 +» 1 +For 1 +▁einheitlichen 1 +arbeiter 1 +abhängig 1 +▁deswegen 1 +Ä 1 +▁Faktor 1 +д 1 +▁Wörter 1 +▁Bearbeitung 1 +88 1 +▁ansprechen 1 +▁konfrontiert 1 +▁Wälder 1 +▁Trotzdem 1 +▁ergibt 1 +▁Oper 1 +▁Schlaf 1 +▁Migration 1 +regen 1 +elf 1 +▁dankbar 1 +▁allzu 1 +▁Befugnisse 1 +▁ehemalige 1 +▁serviert 1 +wähl 1 +▁Van 1 +▁berichtet 1 +geist 1 +▁Seele 1 +▁ländliche 1 +ible 1 +zentren 1 +43 1 +▁Import 1 +viertel 1 +# 1 +▁kümmern 1 +▁Tabak 1 +▁Pool 1 +offen 1 +▁Kopie 1 +▁Übertragung 1 +rechnet 1 +▁Wahrscheinlich 1 +▁vorrangig 1 +einkommen 1 +bund 1 +▁Investitions 1 +europäischen 1 +ruhig 1 +▁momentan 1 +▁Belastung 1 +▁versichern 1 +Par 1 +Zu 1 +samt 1 +▁erreichbar 1 +verhältnis 1 +SI 1 +▁Dusche 1 +▁Sto 1 +DI 1 +▁Registrierung 1 +▁Wiederaufbau 1 +grenze 1 +char 1 +▁reagiert 1 +4-0 1 +▁aktuell 1 +▁Av 1 +▁Luxus 1 +▁Regierungschefs 1 +geworfen 1 +▁Photo 1 +▁überwinden 1 +ava 1 +▁aufbauen 1 +▁Ägypten 1 +versicherung 1 +speicher 1 +▁Wetter 1 +prüf 1 +anta 1 +▁könnt 1 +▁Kollegin 1 +▁mobile 1 +▁Quellen 1 +▁aussprechen 1 +geschlagen 1 +ektor 1 +▁besorgt 1 +▁nett 1 +▁betrieben 1 +▁gewöhnlich 1 +▁hilfreich 1 +▁Fußball 1 +▁Sekunden 1 +ated 1 +▁Fremd 1 +▁EZB 1 +▁AKP 1 +▁this 1 +▁zukünftigen 1 +▁Bereitschaft 1 +▁fantastisch 1 +leib 1 +▁Produktivität 1 +▁Ana 1 +▁Ambiente 1 +▁stehenden 1 +sstrategie 1 +▁Brand 1 +▁nunmehr 1 +▁Anmerkung 1 +▁Nah 1 +▁einzusetzen 1 +opp 1 +gegriffen 1 +▁Hindernisse 1 +▁Peter 1 +wettbewerbsfähig 1 +▁bestimmter 1 +▁Überlegungen 1 +▁verboten 1 +▁müsste 1 +▁Russ 1 +▁buchen 1 +▁Block 1 +▁effizienter 1 +landschaft 1 +col 1 +▁hol 1 +▁österreichische 1 +▁Prag 1 +▁Spitzen 1 +▁bedroht 1 +▁Durchschnitt 1 +45 1 +▁strategischen 1 +▁Überleben 1 +▁medi 1 +▁Kriminalität 1 +▁erschien 1 +▁Balkon 1 +23 1 +č 1 +▁stammt 1 +▁lernt 1 +▁steigende 1 +gesehen 1 +view 1 +▁erwähnen 1 +▁Textil 1 +kapazität 1 +▁Ansprüche 1 +▁Erwägung 1 +▁radikal 1 +▁fördert 1 +▁Poker 1 +stufe 1 +▁Arzneimittel 1 +▁Tonnen 1 +▁Nachhaltigkeit 1 +▁einzigartig 1 +▁fehlende 1 +binden 1 +▁retten 1 +▁Nehmen 1 +▁total 1 +▁Katastrophen 1 +pra 1 +quer 1 +lagerung 1 +▁nachdrücklich 1 +▁Reservierung 1 +▁Wunder 1 +shop 1 +▁eingehalten 1 +schreibung 1 +gearbeitet 1 +▁bemerkt 1 +▁solide 1 +▁großem 1 +▁Mexiko 1 +familie 1 +▁Mel 1 +▁aktivieren 1 +▁Sound 1 +▁Festival 1 +Ha 1 +ielle 1 +urlaub 1 +▁stammen 1 +▁Spitze 1 +▁Gottes 1 +▁aufgebaut 1 +▁Premierminister 1 +klick 1 +Mitglied 1 +IG 1 +▁geraten 1 +▁EADS 1 +▁Pack 1 +47 1 +▁Ball 1 +▁Ungleichheit 1 +▁verletzt 1 +▁Meister 1 +▁weist 1 +▁Mitgliedschaft 1 +▁Ablehnung 1 +▁intelligente 1 +▁Anwender 1 +▁Burg 1 +▁empfangen 1 +gehoben 1 +greift 1 +schlüsse 1 +rack 1 +▁Bestimmung 1 +▁müssten 1 +▁inklusive 1 +▁heißen 1 +▁Vari 1 +▁gekennzeichnet 1 +▁Wesentlichen 1 +▁erweitern 1 +qual 1 +67 1 +66 1 +▁Pferd 1 +22 1 +▁Feind 1 +▁Vorlage 1 +geschnitten 1 +п 1 +▁Miet 1 +▁anbelangt 1 +▁rechtzeitig 1 +▁still 1 +▁Stiftung 1 +▁Verpackung 1 +▁Dreh 1 +UNG 1 +▁anschließen 1 +▁Konto 1 +▁engagiert 1 +▁gewaltige 1 +▁Finger 1 +▁Schwer 1 +boot 1 +▁einbezogen 1 +▁Viel 1 +▁Budget 1 +▁Profil 1 +▁wonach 1 +nütz 1 +▁Vermögen 1 +▁heutzutage 1 +▁anfangen 1 +▁Ankunft 1 +▁Fahrt 1 +90 1 +▁beabsichtigt 1 +▁Ausschüsse 1 +▁Rückkehr 1 +▁Förder 1 +gefühl 1 +▁selber 1 +▁Schuh 1 +▁Station 1 +▁Leit 1 +Enterprise 1 +▁großzügig 1 +methode 1 +▁angelegt 1 +▁verringert 1 +▁WLAN 1 +dorf 1 +▁Investoren 1 +▁virtuelle 1 +▁Parameter 1 +▁Passwort 1 +▁Beide 1 +ographische 1 +▁Manche 1 +▁Bern 1 +▁baut 1 +▁Bay 1 +schütz 1 +schreck 1 +▁herzustellen 1 +▁Fax 1 +– 1 +wuchs 1 +▁aufgeführt 1 +▁passende 1 +agentur 1 +Daten 1 +motor 1 +▁Konsultation 1 +hersteller 1 +▁unzureichend 1 +▁fiel 1 +▁Trag 1 +▁Persönlichkeit 1 +▁vorgestellt 1 +ppel 1 +▁erfreut 1 +▁Entfernung 1 +▁Börse 1 +▁Luxemburg 1 +▁Verzögerung 1 +▁Diplom 1 +▁Gerade 1 +▁Million 1 +▁Alltag 1 +▁Wochenende 1 +brenn 1 +▁mache 1 +▁Group 1 +▁Ya 1 +▁gezielt 1 +AU 1 +weiß 1 +▁mündliche 1 +▁Strände 1 +beruf 1 +▁mitteilen 1 +beschreibung 1 +▁Sozialdemokrat 1 +▁angewendet 1 +▁abstimmen 1 +▁beenden 1 +▁Unterricht 1 +▁italienische 1 +gewicht 1 +▁Beschwerde 1 +▁behauptet 1 +▁Minute 1 +Bericht 1 +▁Allianz 1 +▁Arzt 1 +▁vertrauen 1 +messe 1 +▁anhand 1 +▁vorliegende 1 +kräftig 1 +▁Support 1 +▁Belgien 1 +▁richtet 1 +rechnung 1 +5% 1 +▁formuliert 1 +▁Wall 1 +▁Resultat 1 +ivilgesellschaft 1 +schuss 1 +AV 1 +▁Freitag 1 +▁Ingenieur 1 +▁gründlich 1 +gehört 1 +▁Anders 1 +▁davor 1 +artikel 1 +▁England 1 +othek 1 +▁derzeitige 1 +▁Außenminister 1 +▁vermutlich 1 +▁Müll 1 +▁Ausflüge 1 +▁überrascht 1 +▁Schmerz 1 +▁nennt 1 +▁geliefert 1 +▁befasst 1 +▁Schon 1 +▁eingeleitet 1 +▁empfohlen 1 +▁Dir 1 +planung 1 +LO 1 +▁Donnerstag 1 +▁Entdeckung 1 +▁antworten 1 +▁Fang 1 +21 1 +▁errichtet 1 +▁Hunger 1 +Über 1 +bedarf 1 +▁Impuls 1 +▁erstmals 1 +▁Plätze 1 +▁Geschenk 1 +▁Erstellung 1 +▁Schweizer 1 +▁Abschaffung 1 +▁Tarif 1 +zieh 1 +reit 1 +afrika 1 +▁Motiv 1 +2000 1 +▁legal 1 +Control 1 +▁Leiter 1 +▁Back 1 +▁zurückzuführen 1 +pflege 1 +▁Anschluss 1 +▁bewältigen 1 +▁jetzigen 1 +▁unterstreichen 1 +▁Schnell 1 +▁gefallen 1 +dokument 1 +▁Alkohol 1 +▁physisch 1 +dienstleistungen 1 +▁Schicksal 1 +▁Wärme 1 +▁vertraut 1 +bezogene 1 +▁Normal 1 +▁Sauna 1 +faktor 1 +▁Erzeuger 1 +▁vorzulegen 1 +▁überlassen 1 +Energieeffizienz 1 +Produkt 1 +▁abschließend 1 +glichkeit 1 +ova 1 +sstaatlichkeit 1 +▁Hardware 1 +▁japanische 1 +▁Vorausschau 1 +▁Acht 1 +▁ökonomische 1 +▁mittlere 1 +Com 1 +ergebnisse 1 +▁kombiniert 1 +▁Archiv 1 +▁Email 1 +▁Richter 1 +▁beruhen 1 +Seite 1 +sprozesses 1 +gäste 1 +getriebe 1 +▁Schaf 1 +▁Richtig 1 +▁Barroso 1 +▁hervorgehoben 1 +ponent 1 +brach 1 +▁Präsentation 1 +▁geregelt 1 +DR 1 +▁Hohe 1 +▁dasselbe 1 +PE 1 +▁fliegen 1 +▁Erwerb 1 +▁eingeschränkt 1 +▁legitim 1 +▁Katalog 1 +▁behoben 1 +▁Syrien 1 +▁bisherigen 1 +vision 1 +▁Immer 1 +staatliche 1 +load 1 +▁Freizügigkeit 1 +▁separat 1 +▁Festplatte 1 +Service 1 +minute 1 +▁Länge 1 +sendung 1 +▁digital 1 +ici 1 +▁kalt 1 +dringlich 1 +deutsch 1 +ausgaben 1 +EU 1 +▁Lateinamerika 1 +ifizieren 1 +holz 1 +count 1 +▁verwende 1 +▁Lohn 1 +▁technisch 1 +verständnis 1 +▁draußen 1 +▁polnische 1 +▁senken 1 +▁verbracht 1 +▁definieren 1 +Mark 1 +▁anwenden 1 +▁Prüf 1 +41 1 +▁Know 1 +▁Phil 1 +▁Lobby 1 +▁Vereinfachung 1 +▁begleitet 1 +▁Serbien 1 +▁Reinigung 1 +▁Gegner 1 +▁ausgezeichnet 1 +▁Nu 1 +▁erfordern 1 +spiegel 1 +▁beeinflusst 1 +▁Hamburg 1 +krebs 1 +▁erhoben 1 +▁gelingt 1 +source 1 +▁Ideal 1 +automat 1 +▁hohem 1 +▁Vizepräsident 1 +▁Putin 1 +gestaltung 1 +▁Ratifizierung 1 +anzeige 1 +▁Forscher 1 +▁Konsum 1 +▁Vortrag 1 +▁gestärkt 1 +▁Transaktion 1 +72 1 +regierung 1 +▁Madrid 1 +▁erschaffen 1 +▁schätzen 1 +▁Verbrauch 1 +▁Beteiligten 1 +▁angebracht 1 +56 1 +▁hierbei 1 +▁Hof 1 +▁Extrem 1 +▁Kohäsion 1 +▁erteilt 1 +▁Mauer 1 +▁Zone 1 +▁einzuführen 1 +▁bemerkenswert 1 +▁Versand 1 +▁Umsatz 1 +Staaten 1 +steht 1 +▁Baum 1 +▁registriert 1 +▁gelesen 1 +▁Volkes 1 +bewußt 1 +▁Finnland 1 +schliess 1 +▁Oh 1 +▁vereint 1 +▁Miß 1 +2001 1 +behandlung 1 +SCH 1 +▁Show 1 +▁geräumig 1 +▁Mängel 1 +▁PHP 1 +gemäß 1 +SV 1 +▁solch 1 +Vertrag 1 +charakter 1 +▁festzulegen 1 +ländische 1 +▁Erhaltung 1 +▁Protest 1 +▁Geh 1 +▁gestattet 1 +erstattung 1 +▁wünsche 1 +rüstung 1 +▁bedanken 1 +▁Card 1 +EX 1 +FA 1 +katastrophe 1 +▁Palästinenser 1 +▁kennt 1 +▁festzustellen 1 +heilig 1 +light 1 +▁Apartment 1 +▁teilt 1 +vorschriften 1 +gelegen 1 +▁Il 1 +▁Herbst 1 +▁gebraucht 1 +▁angeblich 1 +▁Verfasser 1 +TER 1 +▁Santa 1 +▁Behinderung 1 +ifizierung 1 +▁Tom 1 +front 1 +▁Michael 1 +▁unentgeltlich 1 +▁beschränken 1 +▁Rauch 1 +▁speichern 1 +abilität 1 +▁Cast 1 +sieg 1 +card 1 +▁Bevor 1 +flex 1 +▁Einwanderer 1 +:// 1 +OP 1 +▁inmitten 1 +▁Lied 1 +78 1 +▁ermutigen 1 +▁Volksgesundheit 1 +züglich 1 +▁ISO 1 +küste 1 +▁Ausrüstung 1 +▁Feier 1 +Version 1 +▁nachzudenken 1 +▁Seminar 1 +▁David 1 +46 1 +▁Dynamik 1 +▁Beseitigung 1 +▁präsentieren 1 +förderung 1 +wässer 1 +ía 1 +▁dauern 1 +▁bewährt 1 +▁gewähren 1 +▁verstehe 1 +Mitgliedstaaten 1 +▁Arbeitgeber 1 +▁Mechanismen 1 +▁erinnere 1 +▁angekündigt 1 +termin 1 +▁Schulung 1 +▁Sobald 1 +schwäche 1 +umpf 1 +ifikation 1 +▁nördlich 1 +▁Formulierung 1 +▁nehme 1 +▁Nachteil 1 +▁Unterkünfte 1 +imp 1 +▁Filter 1 +Sie 1 +▁Befehl 1 +kühl 1 +▁jüngste 1 +у 1 +▁Modernisierung 1 +runde 1 +▁schaut 1 +▁Südafrika 1 +▁Panorama 1 +▁geschätzt 1 +▁Soldaten 1 +stitution 1 +▁elektrische 1 +▁Fertigung 1 +box 1 +▁gründet 1 +▁Belarus 1 +▁Wählen 1 +\ 1 +▁bezeichnen 1 +anspruch 1 +▁erarbeitet 1 +▁herunterladen 1 +▁Fahrrad 1 +boden 1 +▁Bekannt 1 +schlaf 1 +▁Maria 1 +▁stattfindet 1 +▁Vorgehensweise 1 +▁nu 1 +▁begründet 1 +ippen 1 +HA 1 +▁wohnen 1 +RS 1 +▁Media 1 +book 1 +schirm 1 +verlust 1 +35 1 +▁unterliegen 1 +▁Mond 1 +finanzierung 1 +▁Australien 1 +▁Zerstörung 1 +▁gelangt 1 +länge 1 +▁Pilot 1 +▁Willkommen 1 +▁Weltwirtschaft 1 +▁Unsicherheit 1 +▁hierfür 1 +▁koordiniert 1 +▁entfernen 1 +▁Nichtraucher 1 +lässt 1 +▁grundlegend 1 +▁Verzeichnis 1 +▁Konfiguration 1 +▁versteht 1 +haupt 1 +▁bereitet 1 +▁Voraus 1 +â 1 +initiative 1 +▁vergleichbar 1 +▁nord 1 +▁Dol 1 +Server 1 +▁Global 1 +▁Schnee 1 +geschwindigkeit 1 +▁Hügel 1 +2010 1 +strom 1 +▁aktualisiert 1 +▁begeistert 1 +▁aufzubauen 1 +▁Opti 1 +▁Game 1 +Software 1 +gefügt 1 +▁Sonntag 1 +zulegen 1 +▁Resort 1 +▁Höchst 1 +▁Heizung 1 +▁Zauber 1 +ographie 1 +▁läßt 1 +operation 1 +▁Begleit 1 +▁Käufer 1 +Kohäsionspolitik 1 +▁Somit 1 +▁modified 1 +▁Profit 1 +▁übrig 1 +▁Ausbau 1 +▁Nation 1 +▁Vorrang 1 +▁unnötig 1 +▁Passagier 1 +▁anzuwenden 1 +51 1 +terra 1 +▁sichergestellt 1 +▁enthalt 1 +zuführen 1 +83 1 +schön 1 +übergreifende 1 +▁Entwickler 1 +▁öffnet 1 +▁steigern 1 +▁anzunehmen 1 +71 1 +flüssig 1 +▁Fremdenverkehr 1 +82 1 +▁Flor 1 +▁bewerten 1 +76 1 +▁Freihandel 1 +motiv 1 +79 1 +▁Straßburg 1 +▁Rechner 1 +74 1 +▁Zeile 1 +▁laufenden 1 +▁Währungsunion 1 +▁Gewerkschaft 1 +verarbeitung 1 +spricht 1 +stärke 1 +Wirtschaftskrise 1 +▁Emissions 1 +500 1 +back 1 +▁linken 1 +▁Berechnung 1 +▁Demokraten 1 +▁Karriere 1 +▁Abendessen 1 +schrieb 1 +▁Metro 1 +Strahl 1 +wachstum 1 +▁Konzentration 1 +II 1 +technische 1 +▁gesammelt 1 +▁Sat 1 +atik 1 +▁Poly 1 +trans 1 +87 1 +▁wechseln 1 +59 1 +▁Reserve 1 +garten 1 +phrase 1 +NO 1 +kamera 1 +essel 1 +▁raus 1 +▁Händler 1 +« 1 +▁passt 1 +Ex 1 +restaurant 1 +▁anspruchsvolle 1 +▁personenbezogen 1 +▁örtliche 1 +▁Betreiber 1 +wel 1 +fordert 1 +ständigkeit 1 +▁Alpen 1 +liberal 1 +73 1 +▁Washington 1 +▁veröffentlichen 1 +▁Senkung 1 +fenster 1 +▁Kuba 1 +▁vermittelt 1 +▁USB 1 +▁hingegen 1 +▁Koch 1 +▁http 1 +▁diplomatische 1 +▁Möbel 1 +▁250 1 +▁Gefangene 1 +▁russische 1 +▁Tau 1 +▁mehrfach 1 +▁Dokumentation 1 +▁Professor 1 +▁Beachtung 1 +▁mangelnde 1 +▁Meilen 1 +kunst 1 +zutreten 1 +litz 1 +▁Petition 1 +▁Gibt 1 +wünsch 1 +Auf 1 +▁have 1 +▁Datum 1 +▁herauszufinden 1 +▁Truppen 1 +vertreter 1 +54 1 +▁vereinbar 1 +▁Einfach 1 +77 1 +▁Todesstrafe 1 +▁ergänzt 1 +▁Strasse 1 +▁Fakten 1 +▁Stoff 1 +tauchen 1 +▁gehabt 1 +▁IWF 1 +▁Beurteilung 1 +▁Bestell 1 +▁Andererseits 1 +▁Human 1 +▁Ausarbeitung 1 +nehmbar 1 +Stunden 1 +Finde 1 +histori 1 +▁HIV 1 +▁befürchte 1 +▁passen 1 +▁1994 1 +bewusst 1 +Bomb 1 +▁finanzieren 1 +▁irgendwelche 1 +▁mittlerweile 1 +gebrochen 1 +▁Rettung 1 +▁armen 1 +▁schien 1 +▁Äußerung 1 +kredit 1 +serie 1 +▁beweisen 1 +ú 1 +▁Souveränität 1 +▁Schlusselwort 1 +rechtliche 1 +▁Säule 1 +▁geheim 1 +führ 1 +▁Lesen 1 +software 1 +▁gleichermaßen 1 +▁Tanz 1 +▁erheben 1 +▁Ereignis 1 +58 1 +▁beseitigen 1 +▁Aufsicht 1 +▁Betrug 1 +▁freut 1 +▁Auseinandersetz 1 +▁Übernahme 1 +▁schädlich 1 +▁teuer 1 +▁geschafft 1 +▁Street 1 +▁lebendig 1 +▁Entspannung 1 +▁Newsletter 1 +anforderungen 1 +▁Steigerung 1 +▁Saison 1 +▁strikt 1 +dämm 1 +▁Java 1 +zip 1 +höhe 1 +▁Verwirklichung 1 +zweig 1 +▁Revision 1 +heirat 1 +▁portugiesische 1 +▁Etwa 1 +▁bevorzugt 1 +▁schrittweise 1 +▁berechnet 1 +▁Modern 1 +▁erwiesen 1 +▁Nachmittag 1 +▁Bäume 1 +▁Nov 1 +Spiel 1 +▁gemein 1 +schränke 1 +▁Leidenschaft 1 +▁Null 1 +▁Ausgleich 1 +▁fürchte 1 +strecken 1 +Work 1 +▁objektiv 1 +gefertigt 1 +▁diverse 1 +logie 1 +▁Auflösung 1 +▁grammatisch 1 +inisterpräsident 1 +▁Gehminuten 1 +▁Referenz 1 +▁gewidmet 1 +▁Manchmal 1 +▁überwachen 1 +▁realer 1 +▁Empfang 1 +▁Parkplatz 1 +▁basierend 1 +▁erkennt 1 +max 1 +zeile 1 +▁Kreativität 1 +▁angefangen 1 +▁versteckt 1 +down 1 +▁Finanzielle 1 +raff 1 +▁demokratisch 1 +vil 1 +▁denjenigen 1 +▁kontaktieren 1 +gebäude 1 +zauber 1 +▁Jean 1 +▁Referendum 1 +▁erworben 1 +▁ausüben 1 +▁Anmeldung 1 +▁extra 1 +kannt 1 +▁Klang 1 +▁soeben 1 +▁Allgemein 1 +▁Überblick 1 +▁Genf 1 +▁Spektrum 1 +ING 1 +▁Spiegel 1 +▁Osteuropa 1 +▁plus 1 +▁Minderheit 1 +▁Parkplätze 1 +Rechtsvorschrift 1 +▁herstellen 1 +verhältnisse 1 +Schnittstelle 1 +gast 1 +• 1 +▁einstimmig 1 +▁luxuriöse 1 +vermögen 1 +▁Okay 1 +▁Kennzeichnung 1 +▁Umweltfragen 1 +▁indische 1 +▁verursachen 1 +▁Paar 1 +Prozess 1 +▁festlegen 1 +я 1 +▁faszinierend 1 +▁seltsam 1 +▁Luftverkehr 1 +▁vous 1 +gesundheit 1 +▁getötet 1 +▁fassen 1 +▁Wartung 1 +▁vergleichen 1 +▁Robert 1 +▁bedauerlich 1 +▁langjährige 1 +▁Zeitplan 1 +▁künstlich 1 +städte 1 +▁jedenfalls 1 +fotograf 1 +▁beinhalten 1 +komplex 1 +▁aktiviert 1 +▁Lieblings 1 +▁Pub 1 +▁Logik 1 +▁Errichtung 1 +versammlung 1 +präg 1 +▁nachhaltig 1 +Gesundheitswesen 1 +▁Wahrnehmung 1 +▁Wirksamkeit 1 +▁investiert 1 +▁Massage 1 +„ 1 +platte 1 +69 1 +▁Ausrichtung 1 +▁Oliven 1 +▁Bewohner 1 +▁niederländische 1 +▁ungarische 1 +▁starb 1 +▁Schlussel 1 +▁Zivilisation 1 +▁Philosophie 1 +▁Chef 1 +▁Ecke 1 +▁600 1 +▁besagt 1 +▁Konstruktion 1 +▁Szene 1 +▁leistet 1 +▁berichten 1 +▁Arbeitskräfte 1 +▁Qui 1 +▁sparen 1 +▁abgegeben 1 +▁Betreuung 1 +▁Tendenz 1 +güter 1 +muster 1 +▁übermittelt 1 +▁älteste 1 +▁Schwäche 1 +▁Betriebssystem 1 +▁beobachtet 1 +▁beantwortet 1 +▁Lücke 1 +▁Acc 1 +▁beschreiben 1 +▁Ratschlag 1 +▁Ergänzung 1 +▁islamische 1 +▁Geheimnis 1 +▁Steuerzahler 1 +▁Erkenntnisse 1 +▁vorsichtig 1 +▁Versprechen 1 +Sicherheitsrat 1 +▁körperlich 1 +▁Umstände 1 +Entschlossenheit 1 +▁beinahe 1 +▁Mitgefühl 1 +taucht 1 +▁geltend 1 +wichtig 1 +▁endet 1 +nbetracht 1 +▁Flash 1 +▁Mitentscheidung 1 +84 1 +▁entstand 1 +Gruppe 1 +händler 1 +▁Abteilung 1 +▁Rock 1 +▁Jack 1 +Und 1 +▁verschiedenste 1 +▁eignet 1 +▁einzelstaatlich 1 +▁Island 1 +▁nein 1 +LAN 1 +▁DIE 1 +▁bedeutsam 1 +analyse 1 +▁müßte 1 +▁dunkle 1 +▁oftmals 1 +stellbar 1 +▁Mechanismus 1 +▁Substanz 1 +▁Psycho 1 +TEN 1 +▁wünscht 1 +fertigung 1 +musik 1 +▁Niemand 1 +▁startet 1 +▁bewahren 1 +▁fuer 1 +▁Benutzung 1 +▁Flüchtlings 1 +▁Tschechische 1 +▁anzubieten 1 +▁geblieben 1 +▁Bilanz 1 +▁Johann 1 +fekt 1 +opfer 1 +▁Internetseite 1 +▁Ausweitung 1 +werbung 1 +▁Konvention 1 +▁dargelegt 1 +53 1 +▁super 1 +▁begegnen 1 +bücher 1 +▁Luftfahrt 1 +▁PPE 1 +GO 1 +▁Rückgang 1 +▁Saal 1 +▁derselben 1 +▁Mehrwertsteuer 1 +▁steckt 1 +EIN 1 +verteilung 1 +▁Bezeichnung 1 +▁German 1 +▁Logo 1 +▁vorzunehmen 1 +▁Bestätigung 1 +▁Tragödie 1 +▁ungewöhnlich 1 +▁use 1 +▁Durchsetzung 1 +▁Architekt 1 +▁Visa 1 +ION 1 +▁House 1 +konsum 1 +mitte 1 +▁oberste 1 +▁Zulassung 1 +▁gesichert 1 +▁Tasche 1 +währung 1 +ffel 1 +Up 1 +▁Aktivität 1 +▁Dänemark 1 +▁Transfer 1 +nack 1 +▁mutig 1 +63 1 +▁GAP 1 +▁Anhörung 1 +Benutzer 1 +▁spanische 1 +▁anerkennen 1 +erzeugnisse 1 +▁steigt 1 +bevölkerung 1 +▁PDF 1 +nutzung 1 +▁könne 1 +institutionelle 1 +▁Präsenz 1 +▁Futter 1 +▁Frühjahr 1 +▁Hör 1 +î 1 +▁planen 1 +schärfe 1 +▁Log 1 +▁George 1 +▁Route 1 +▁Vogel 1 +▁irgendwo 1 +▁Terroristen 1 +verkauf 1 +transfer 1 +leiste 1 +▁Beschluß 1 +▁zeitlich 1 +▁Flughäfen 1 +▁Interview 1 +pend 1 +28 1 +▁Qual 1 +▁Aufzug 1 +▁sensible 1 +▁Besondere 1 +▁künstlerische 1 +29 1 +▁Nutz 1 +terrasse 1 +bekämpfung 1 +▁schließt 1 +▁Volkspartei 1 +Umstrukturierung 1 +▁lenken 1 +▁süd 1 +▁stattgefunden 1 +▁Konkurrenz 1 +ressourcen 1 +suite 1 +▁Gewässer 1 +▁exakt 1 +▁erfasst 1 +▁Tempo 1 +▁fortgesetzt 1 +▁anschauen 1 +57 1 +▁Zwar 1 +▁Listings 1 +▁Genießen 1 +81 1 +▁Fett 1 +▁pour 1 +▁logisch 1 +▁Französisch 1 +dimension 1 +▁Café 1 +▁Segel 1 +▁Eröffnung 1 +▁entdeckst 1 +▁Krankenhaus 1 +▁hinzufugen 1 +▁Dörfer 1 +▁multilaterale 1 +ê 1 +▁städtische 1 +▁Vorbehalt 1 +Technologie 1 +▁spannend 1 +wettbewerb 1 +▁Ablauf 1 +▁einmalige 1 +▁Bürokratie 1 +▁Jedoch 1 +▁Danach 1 +hundert 1 +▁Differenz 1 +▁Fassung 1 +signal 1 +▁verrückt 1 +▁Ufer 1 +▁verpflichten 1 +▁anhaltende 1 +käufe 1 +..."... 1 +▁Empfänger 1 +▁Rezession 1 +▁ablehnen 1 +▁Schwester 1 +▁800 1 +masse 1 +▁teure 1 +▁stattdessen 1 +▁hinzugefügt 1 +▁Ostsee 1 +ject 1 +▁Christen 1 +▁geleitet 1 +▁realisiert 1 +▁2012 1 +▁traurig 1 +matische 1 +Source 1 +▁Kunststoff 1 +einhalb 1 +▁Prodi 1 +Richtlinie 1 +Agent 1 +mauer 1 +▁abgesehen 1 +▁gestartet 1 +▁JavaScript 1 +▁User 1 +▁umfassen 1 +Bereich 1 +▁Beobachter 1 +▁aussieht 1 +read 1 +▁anpassen 1 +ough 1 +▁Investition 1 +▁Zunahme 1 +▁Quoten 1 +kampagne 1 +verschmutzung 1 +▁ECU 1 +▁gestatten 1 +▁Missbrauch 1 +flieg 1 +anwendung 1 +▁Konjunktur 1 +▁Zuverlässigkeit 1 +▁Dringlichkeit 1 +▁Vordergrund 1 +▁Störung 1 +www 1 +▁views 1 +▁DNA 1 +▁Arbeitszeit 1 +▁Skype 1 +▁stärksten 1 +▁belgische 1 +▁Erfindung 1 +▁Mischung 1 +▁geschah 1 +▁Salz 1 +▁Beschäftigte 1 +▁interessieren 1 +▁Erreichung 1 +▁Klarheit 1 +▁erbaut 1 +ific 1 +▁Samstag 1 +KOM 1 +äuschen 1 +▁irgend 1 +▁wild 1 +▁iranische 1 +▁Key 1 +▁keineswegs 1 +▁drastisch 1 +▁gäbe 1 +▁verarbeitet 1 +zwecke 1 +▁Feststellung 1 +▁Hunderte 1 +▁Urheberrecht 1 +Schlussfolgerung 1 +▁Brau 1 +telefon 1 +revolution 1 +▁widmen 1 +▁zulässig 1 +▁Wichtigkeit 1 +▁schlicht 1 +▁Kenntnisse 1 +▁Vertretung 1 +▁genehmigt 1 +▁Verfolgung 1 +▁Unfall 1 +spalt 1 +ologen 1 +▁kompetent 1 +▁Nizza 1 +▁1980 1 +▁privat 1 +▁verabschieden 1 +▁beibehalten 1 +▁Figur 1 +▁unterbreitet 1 +▁Privatsphäre 1 +basis 1 +Dollar 1 +samkeit 1 +▁Stich 1 +▁Song 1 +▁erleichtert 1 +oberfläche 1 +▁Variante 1 +ministerium 1 +▁Gemüse 1 +▁kollektive 1 +Musik 1 +▁zustande 1 +▁Brennstoff 1 +▁bestmöglich 1 +▁Einnahmen 1 +▁Zahn 1 +▁elegant 1 +▁Schicht 1 +wirtschaftliche 1 +▁Erkrankung 1 +▁dreht 1 +▁Museen 1 +▁testen 1 +▁tragisch 1 +▁Besonderheit 1 +▁Kyoto 1 +▁nannte 1 +▁strukturelle 1 +▁Behörde 1 +besitz 1 +normen 1 +unterschied 1 +▁vereinfacht 1 +▁Offenheit 1 +▁unerlässlich 1 +avec 1 +▁Rahmenprogramm 1 +▁Welthandels 1 +▁Bruder 1 +▁Beobachtung 1 +92 1 +▁parallel 1 +▁Gaza 1 +▁Einschätzung 1 +▁Life 1 +gesteuert 1 +▁NICHT 1 +▁benachteiligt 1 +▁verankert 1 +▁Visum 1 +▁Schlussfolger 1 +▁integrieren 1 +▁Qualifikation 1 +▁bewertet 1 +▁Bereits 1 +▁Fundament 1 +▁statistisch 1 +▁Stress 1 +▁Übersicht 1 +santräge 1 +▁Evolution 1 +stöße 1 +▁sozusagen 1 +risiko 1 +▁Lieferanten 1 +▁Masse 1 +▁Palette 1 +modul 1 +▁Aktualisierung 1 +▁Begründung 1 +▁respektieren 1 +♪ 1 +▁Direktor 1 +frau 1 +▁Zurück 1 +disziplin 1 +mechanismus 1 +▁Prozeß 1 +▁Gentoo 1 +▁Album 1 +▁Fabrik 1 +Jahr 1 +lux 1 +mechanismen 1 +▁Mathematik 1 +▁gerechtfertigt 1 +▁stilvoll 1 +▁Lärm 1 +reiz 1 +glas 1 +bleiben 1 +▁exzellente 1 +bilanz 1 +oire 1 +▁Stattdessen 1 +▁verurteilen 1 +▁Fertig 1 +▁territoriale 1 +▁überraschend 1 +▁Airbus 1 +▁beschleunigen 1 +▁entworfen 1 +▁längst 1 +▁Nachdruck 1 +▁Kürze 1 +▁Slowakei 1 +▁Saint 1 +cord 1 +▁Gegenstände 1 +Video 1 +▁Blumen 1 +▁Siedlung 1 +therapie 1 +▁Flotte 1 +▁Mittelmeerraum 1 +▁drücken 1 +▁aufregend 1 +▁Aufhebung 1 +▁Bosnien 1 +▁Tennis 1 +schmerz 1 +▁Hochzeit 1 +▁Arbeitsgruppe 1 +▁Etikett 1 +▁Hostelsclub 1 +▁Kaiser 1 +▁bedienen 1 +▁Fotografie 1 +metall 1 +▁Ausbeutung 1 +▁Wiederherstell 1 +2009 1 +Frühstücksbuffet 1 +▁erstklassige 1 +▁anzupassen 1 +▁modernste 1 +▁Alternativ 1 +▁auswählen 1 +▁Kürzung 1 +klima 1 +board 1 +kriterien 1 +▁Innenstadt 1 +▁Finanzmärkte 1 +umweltfreundlich 1 +▁Kernel 1 +▁Demonstration 1 +▁eröffnen 1 +▁gegebenenfalls 1 +▁1993 1 +▁Laser 1 +▁Rassismus 1 +▁ärmsten 1 +▁emotional 1 +▁studiert 1 +saison 1 +formular 1 +Sowohl 1 +▁Beförderung 1 +▁unbekannt 1 +gestalt 1 +Abkommen 1 +▁Liberalen 1 +▁ignoriert 1 +möglichkeit 1 +▁beschreibt 1 +▁Kreatur 1 +hancengleichheit 1 +▁Galerie 1 +▁Fuss 1 +▁Treibhausgas 1 +▁umgekehrt 1 +▁verschaffen 1 +▁Wolf 1 +▁1992 1 +ibili 1 +▁Entdecke 1 +▁Mobiltelefon 1 +▁respektiert 1 +▁zwölf 1 +▁America 1 +▁Hans 1 +ador 1 +stunden 1 +irurg 1 +▁Brenn 1 +ž 1 +plikation 1 +▁Gegenwart 1 +93 1 +▁Füße 1 +dichte 1 +▁legislative 1 +▁Häfen 1 +▁Schnitt 1 +museum 1 +schätzung 1 +nachfolgend 1 +▁Halt 1 +▁inspiriert 1 +▁Erzeugung 1 +▁Reparatur 1 +▁Fortsetzung 1 +▁erörtert 1 +▁nukleare 1 +▁Prävention 1 +▁Florenz 1 +▁Mehrwert 1 +ь 1 +▁Innerhalb 1 +▁anzuzeigen 1 +▁staff 1 +▁Check 1 +▁Ferr 1 +▁Schwelle 1 +▁Applikation 1 +▁unzählige 1 +▁Sprech 1 +▁fortsetzen 1 +vereinbarung 1 +▁Verkehrsmittel 1 +stift 1 +▁Marokko 1 +▁Anwesenheit 1 +▁Fokus 1 +▁Anregung 1 +▁Komplexität 1 +▁Verhältnisse 1 +leuchten 1 +reihe 1 +papier 1 +▁Lokal 1 +▁100% 1 +▁Grünbuch 1 +▁Elite 1 +▁vergangen 1 +▁Pfad 1 +ô 1 +▁begleiten 1 +▁Wechselkurs 1 +▁Bonus 1 +▁Berater 1 +versuch 1 +wid 1 +▁Thomas 1 +▁Reichtum 1 +▁begangen 1 +aufgaben 1 +▁Physik 1 +▁zugute 1 +5.000 1 +Lösung 1 +Гј 1 +▁kommunizieren 1 +▁verwandelt 1 +▁Problematik 1 +studie 1 +▁NRO 1 +Regierung 1 +Gipfel 1 +▁tschechische 1 +▁Vereinigung 1 +▁Folgendes 1 +▁Angestellte 1 +▁wofür 1 +▁Dublin 1 +▁Abfälle 1 +▁Solche 1 +größte 1 +з 1 +▁Anhänger 1 +▁Ausgrenzung 1 +▁herausragende 1 +▁Erarbeitung 1 +Paket 1 +▁Weiterbildung 1 +regulierung 1 +profil 1 +▁Abenteuer 1 +▁Konvergenz 1 +▁flexibel 1 +▁vorsieht 1 +▁Venedig 1 +▁unterbrochen 1 +▁Echtzeit 1 +▁Behauptung 1 +hai 1 +▁verantwortungs 1 +eiße 1 +▁spüren 1 +▁problemlos 1 +▁damalige 1 +▁2013 1 +2003 1 +▁Inkrafttreten 1 +▁Magazin 1 +▁minimal 1 +▁Statut 1 +▁bekräftigt 1 +▁gekauft 1 +2008 1 +▁Nigeria 1 +gipfel 1 +▁bearbeitet 1 +entscheidung 1 +▁Therapie 1 +▁Verabschiedung 1 +▁erwerben 1 +▁Black 1 +▁Erscheinung 1 +gezeichnet 1 +geschaltet 1 +▁Insgesamt 1 +▁unterscheidet 1 +▁weibliche 1 +berichterstatter 1 +kämpfe 1 +▁Orientierung 1 +▁Gipfeltreffen 1 +Expert 1 +▁eingeräumt 1 +▁natur 1 +▁Wüste 1 +flüsse 1 +▁Virus 1 +klagt 1 +▁basieren 1 +▁etabliert 1 +▁maßgeblich 1 +emissionen 1 +messung 1 +▁Zusage 1 +▁stecken 1 +sprachige 1 +▁III 1 +nähe 1 +▁gewinnt 1 +▁fließen 1 +▁erlangen 1 +▁Korrektur 1 +▁bürgerliche 1 +▁Gewähr 1 +sequenz 1 +▁Mütter 1 +▁Geltung 1 +▁verwandeln 1 +▁ethische 1 +blatt 1 +▁Extra 1 +groß 1 +▁sofortige 1 +ergebnis 1 +94 1 +Annehmlichkeiten 1 +▁Hauptbahnhof 1 +▁kritisiert 1 +▁Talent 1 +▁Eigenschaft 1 +89 1 +▁Gedanke 1 +▁jünger 1 +▁permanent 1 +veranstaltung 1 +tempo 1 +Team 1 +Modus 1 +▁glaubwürdig 1 +▁verwirklichen 1 +▁Griff 1 +▁Ähnlich 1 +Politik 1 +▁Disziplin 1 +stürzt 1 +▁vermieden 1 +▁Höhle 1 +▁Zusammensetzung 1 +▁arbeits 1 +▁finanziell 1 +reinigung 1 +С 1 +▁Rubrik 1 +▁rechtfertigen 1 +▁vermute 1 +▁Indikator 1 +▁schwerwiegende 1 +▁reichhaltig 1 +▁Einzelheiten 1 +ichtraucherzonen 1 +1⁄4 1 +▁Scheitern 1 +▁Zuerst 1 +▁liberale 1 +▁Michel 1 +▁Verlängerung 1 +▁atemberaubend 1 +▁verbreiten 1 +▁produktiv 1 +▁Wohnzimmer 1 +Ebene 1 +teilnehmer 1 +Point 1 +▁gefährden 1 +▁Hektar 1 +missbrauch 1 +▁Lebensqualität 1 +▁füllen 1 +å 1 +▁Entschädigung 1 +▁Wahrung 1 +▁gepflegt 1 +gespräch 1 +▁kenne 1 +▁Ministerrat 1 +▁University 1 +▁reibungslos 1 +▁Batterie 1 +▁Knochen 1 +▁überwiegend 1 +lapp 1 +mütig 1 +▁Eigentümer 1 +▁ordentlich 1 +erzeugung 1 +▁Studium 1 +Generalsekretär 1 +▁Mittwoch 1 +▁Spaziergang 1 +▁Rußland 1 +▁Depression 1 +▁Weiterhin 1 +▁ignorieren 1 +▁zugestimmt 1 +▁Bananen 1 +anbieter 1 +▁Möchte 1 +▁Rasse 1 +▁Kraftstoff 1 +▁Performance 1 +▁1991 1 +▁beizutragen 1 +▁Energiequellen 1 +räder 1 +▁klug 1 +▁Unterdrückung 1 +▁gravierend 1 +▁zerstören 1 +▁erstreckt 1 +▁romantische 1 +periode 1 +Bürger 1 +▁malerische 1 +objekt 1 +́ 1 +▁Bewältigung 1 +▁gebilligt 1 +▁verliehen 1 +▁europaweit 1 +▁universell 1 +▁zeige 1 +störung 1 +▁Rezept 1 +▁Literatur 1 +▁Zürich 1 +▁appelliere 1 +▁fundamental 1 +▁zurückkehren 1 +▁gelingen 1 +▁angewiesen 1 +schuh 1 +` 1 +zuziehen 1 +effizient 1 +▁Zufriedenheit 1 +▁bemerken 1 +reinigt 1 +▁herrschen 1 +ão 1 +[ 1 +2002 1 +▁Libanon 1 +▁allmählich 1 +▁verknüpft 1 +▁Kleidung 1 +▁Mittler 1 +▁teilzunehmen 1 +▁markiert 1 +▁Geräusch 1 +▁Airport 1 +house 1 +▁zwanzig 1 +temperatur 1 +brecher 1 +▁Heimatland 1 +▁Mitgliedsländer 1 +▁Kämpfe 1 +▁Logistik 1 +▁lädt 1 +▁Gesichtspunkt 1 +benutzer 1 +▁Tibet 1 +▁spiegelt 1 +▁1989 1 +▁good 1 +Funktion 1 +▁Gedächtnis 1 +▁verwaltet 1 +▁verschwinden 1 +Dienst 1 +▁Entstehung 1 +▁beschleunigt 1 +▁Ordner 1 +▁Dampf 1 +▁unterwegs 1 +▁Gewebe 1 +schmutz 1 +▁inhaltlich 1 +▁Ermittlung 1 +▁löschen 1 +▁agieren 1 +▁Gelände 1 +Format 1 +plattform 1 +▁auswärtige 1 +▁obligatorisch 1 +▁terroristische 1 +klausel 1 +▁überarbeitet 1 +▁zielt 1 +▁Angehörige 1 +▁vorliegt 1 +▁Doppelzimmer 1 +¶ 1 +▁Stabilisierung 1 +medizin 1 +▁Spanisch 1 +▁Koordination 1 +stuhl 1 +▁dänische 1 +feuer 1 +steigerung 1 +verbindlich 1 +▁Legislativ 1 +▁Bewerber 1 +▁touristische 1 +▁Zusammenbruch 1 +sammlung 1 +▁verzichten 1 +▁GNU 1 +▁asiatische 1 +motion 1 +‘ 1 +▁Spezialitäten 1 +empfindlich 1 +▁12.00 1 +Demokratisierung 1 +▁Erdbeben 1 +▁Vergnügen 1 +▁schreibt 1 +▁löst 1 +▁Inhaber 1 +sphäre 1 +▁Act 1 +▁Übung 1 +▁Moskau 1 +▁Rechenschaft 1 +▁heftig 1 +▁berührt 1 +demokratische 1 +▁stützen 1 +▁Mühe 1 +▁Genuss 1 +▁Köln 1 +▁1,5 1 +▁Funktionalität 1 +▁Territorium 1 +▁angestrebt 1 +▁vereinfachen 1 +▁Domain 1 +▁Taxi 1 +▁benannt 1 +▁konzipiert 1 +▁Zuschauer 1 +▁scheinbar 1 +filter 1 +▁Ernst 1 +ł 1 +▁Maastricht 1 +▁Palästina 1 +status 1 +smethoden 1 +▁aufgerufen 1 +▁Straßenverkehr 1 +▁Diagnose 1 +▁Monopol 1 +▁location 1 +▁Chemie 1 +▁Royal 1 +▁Maßstab 1 +▁MySQL 1 +▁theoretisch 1 +▁entspannt 1 +▁Maxim 1 +Fotograf 1 +Institut 1 +▁brutal 1 +▁Vorredner 1 +▁winzig 1 +▁Höhepunkt 1 +▁analysieren 1 +▁kohärent 1 +▁Salzburg 1 +sozial 1 +kompetenz 1 +▁Desktop 1 +▁Getreide 1 +▁leitet 1 +▁administrative 1 +▁spektakulär 1 +▁vorübergehend 1 +▁folglich 1 +▁Register 1 +▁Medikament 1 +förmig 1 +▁erläutern 1 +▁genießt 1 +beginn 1 +szusammenarbeit 1 +▁nutzbar 1 +▁psychisch 1 +▁Universal 1 +▁Nerven 1 +▁Plastik 1 +präsidenten 1 +arquis 1 +▁trug 1 +▁Produzenten 1 +▁Ausübung 1 +▁Folter 1 +2006 1 +ы 1 +▁Betroffenen 1 +▁Zustellbett 1 +▁geholfen 1 +▁beiträgt 1 +▁Brüder 1 +▁Schätzung 1 +▁Drittstaaten 1 +defizit 1 +▁zahlt 1 +▁unverzichtbar 1 +▁Workshop 1 +▁herkömmliche 1 +▁Gross 1 +▁herausfinden 1 +▁vorbereiten 1 +▁parti 1 +▁Präsidium 1 +▁ausgedehnt 1 +▁erörtern 1 +▁Bedienung 1 +▁gehandelt 1 +▁verschieden 1 +▁Fülle 1 +▁grob 1 +▁kauft 1 +▁erläutert 1 +▁Devisen 1 +▁unterbreiten 1 +▁gewann 1 +▁Adria 1 +▁Ökosystem 1 +▁erachte 1 +▁Bakterien 1 +▁visuelle 1 +▁vorbehalten 1 +experiment 1 +▁Einladung 1 +▁empfängt 1 +▁befand 1 +▁beurteilen 1 +▁Riesen 1 +▁iPhone 1 +▁resultieren 1 +▁Verkäufer 1 +▁getestet 1 +▁denselben 1 +▁Mögliche 1 +▁schließe 1 +▁traditionell 1 +▁Gestalt 1 +▁Interpretation 1 +▁ratifiziert 1 +firmen 1 +▁scharf 1 +nbsp 1 +▁zitiere 1 +wolle 1 +gültig 1 +▁Elektrizität 1 +▁Atlantik 1 +▁droht 1 +▁Kalender 1 +betrug 1 +▁Lounge 1 +▁empfinde 1 +▁Chemikalien 1 +▁enttäuscht 1 +▁transatlantisch 1 +▁Anfänge 1 +▁verliert 1 +▁schützt 1 +▁befreien 1 +▁bezogen 1 +▁sportlich 1 +‚ 1 +г 1 +▁Ankündigung 1 +▁irgendwann 1 +▁mittelalterlich 1 +▁verhandeln 1 +▁aufrichtig 1 +▁Flasche 1 +▁Jugoslawien 1 +▁Taiwan 1 +▁Trennung 1 +▁zutiefst 1 +▁Centre 1 +▁Milliarde 1 +▁Außer 1 +▁Galaxie 1 +▁Rotary 1 +▁bedauere 1 +▁Wertpapier 1 +Artikel 1 +▁bürokratische 1 +▁Konsumenten 1 +ст 1 +senkung 1 +Administration 1 +▁Intelligenz 1 +▁beeinträchtigt 1 +▁Infektion 1 +ausstattung 1 +▁nenne 1 +▁Argentinien 1 +▁Subventionen 1 +▁Spuren 1 +▁Überraschung 1 +▁regeln 1 +▁Züge 1 +▁Pharma 1 +schöpfung 1 +▁Geburtstag 1 +▁Elektronik 1 +▁schenken 1 +▁gründen 1 +kirche 1 +besuch 1 +▁Motto 1 +▁stetig 1 +▁Vorgaben 1 +▁Diktatur 1 +▁Verstärkung 1 +▁inakzeptabel 1 +▁stoppen 1 +▁School 1 +▁insofern 1 +höfe 1 +▁verheerend 1 +▁Vögel 1 +▁sanft 1 +Design 1 +lücke 1 +▁weiss 1 +▁Rahmenbedingung 1 +  1 +▁Potential 1 +▁fügt 1 +▁Nordkorea 1 +▁Spezies 1 +▁ungeachtet 1 +▁Quadrat 1 +▁Rhein 1 +▁Sechs 1 +▁Navigation 1 +▁definitiv 1 +▁musikalische 1 +▁absurd 1 +▁Weißbuch 1 +▁entschied 1 +▁Blue 1 +▁Publikation 1 +▁erkennbar 1 +▁kostengünstig 1 +▁kommunistische 1 +▁trennen 1 +▁Libyen 1 +▁Sowjetunion 1 +▁bedauern 1 +club 1 +lateral 1 +▁jahrelang 1 +▁worauf 1 +▁sinken 1 +Temp 1 +▁Weihnachts 1 +▁Wohlbefinden 1 +▁römische 1 +▁Anweisungen 1 +flotte 1 +fleisch 1 +kreuz 1 +ansprüche 1 +▁irakische 1 +▁Charles 1 +▁einheimische 1 +video 1 +spruch 1 +▁Foundation 1 +▁Investment 1 +▁kompakt 1 +▁Meldung 1 +▁offenkundig 1 +▁interaktive 1 +▁geniessen 1 +▁bevorstehenden 1 +▁Mineral 1 +Fischereipolitik 1 +▁Alexander 1 +▁Ungleichgewicht 1 +▁schlug 1 +▁Besatzung 1 +▁Dutzend 1 +▁melden 1 +▁Warnung 1 +і 1 +▁Frequenz 1 +▁Kompromiß 1 +▁Norwegen 1 +▁Früchte 1 +▁wünschenswert 1 +▁Rindfleisch 1 +▁multinationale 1 +▁Monitor 1 +▁vorteilhaft 1 +▁Index 1 +Modell 1 +potenzial 1 +▁entscheidet 1 +▁horizontal 1 +▁Toilette 1 +sammenzuarbeiten 1 +lizenz 1 +▁informelle 1 +▁zukünftig 1 +ökonom 1 +▁verlängert 1 +▁Gärten 1 +summe 1 +▁Bedingung 1 +▁analysiert 1 +▁Vietnam 1 +leuchtet 1 +brücke 1 +town 1 +▁Nuklear 1 +▁Litauen 1 +▁fossile 1 +▁eingebaut 1 +▁problematisch 1 +▁klingt 1 +härte 1 +Plug 1 +protokoll 1 +▁Aluminium 1 +▁Mazedonien 1 +▁Slowenien 1 +▁Richard 1 +▁Ultra 1 +▁isoliert 1 +Internet 1 +▁Stabilitätspakt 1 +▁Vermarktung 1 +übertragung 1 +feindliche 1 +▁renommierte 1 +▁verschärft 1 +▁Überarbeitung 1 +▁Aufklärung 1 +▁ansonsten 1 +▁fühle 1 +▁operative 1 +▁beseitigt 1 +▁motiviert 1 +▁bescheiden 1 +▁blind 1 +▁Turnier 1 +kündigt 1 +▁Integrität 1 +▁verwalten 1 +§ 1 +▁Erdöl 1 +▁trocken 1 +▁wählt 1 +erfahrung 1 +▁Illusion 1 +▁optimiert 1 +▁AIDS 1 +▁Flagge 1 +▁jeweilige 1 +▁abzielen 1 +▁Frucht 1 +▁ernannt 1 +▁muslimische 1 +▁Governance 1 +▁Protein 1 +й 1 +▁identifizieren 1 +▁ewig 1 +konflikt 1 +▁Zeichnung 1 +▁Anleger 1 +▁Kanäle 1 +▁gesundheitliche 1 +wärme 1 +€ 1 +ografie 1 +▁Korea 1 +▁which 1 +▁Freuen 1 +▁gefolgt 1 +▁Kohlenstoff 1 +▁Swiss 1 +infrastruktur 1 +▁finnische 1 +▁Netto 1 +Gestatten 1 +▁korrigieren 1 +▁zeitgenössische 1 +▁Klinik 1 +Commerce 1 +streifen 1 +angehörige 1 +▁Köpfe 1 +▁Hotelsafe 1 +bearbeitung 1 +▁erfunden 1 +▁liebt 1 +▁Schwellenländer 1 +▁Adobe 1 +verantwortlich 1 +vorsitzende 1 +▁Indonesien 1 +▁Schokolade 1 +▁jüdische 1 +▁Ökonomie 1 +erlebnis 1 +▁abzielt 1 +▁Facebook 1 +▁Sorgfalt 1 +▁versprochen 1 +▁Optimierung 1 +szeitraum 1 +▁Schlußfolgerung 1 +▁bewaffnete 1 +▁lustig 1 +▁töten 1 +▁auszuüben 1 +wörter 1 +Bild 1 +▁Laptop 1 +▁Mallorca 1 +▁akzeptabel 1 +▁Erfordernisse 1 +· 1 +▁potentiell 1 +▁Chinesen 1 +▁Materie 1 +Engine 1 +▁Folie 1 +schöpfen 1 +▁Budapest 1 +▁profitiert 1 +▁Periode 1 +▁Gemäß 1 +▁Ernennung 1 +▁Kloster 1 +▁klinische 1 +▁aktualisieren 1 +▁tödlich 1 +▁vertraulich 1 +▁Münz 1 +▁Kohärenz 1 +▁empfiehlt 1 +▁äußert 1 +▁Reihenfolge 1 +▁durfte 1 +▁Tempel 1 +▁Zuhause 1 +▁flach 1 +Karte 1 +▁breakfast 1 +▁erfreulich 1 +▁Ideologie 1 +praxis 1 +▁blockiert 1 +▁Schauspieler 1 +Preis 1 +erkennung 1 +▁Einfluß 1 +▁Millennium 1 +▁Privileg 1 +▁zwangsläufig 1 +▁Gummi 1 +flücht 1 +Partner 1 +▁eindrucksvoll 1 +aufrechterhalten 1 +▁Kabine 1 +▁familiär 1 +▁Muslime 1 +▁keinesfalls 1 +▁dünn 1 +▁LateRooms 1 +▁Albanien 1 +▁Annäherung 1 +▁Behinderte 1 +▁Evaluierung 1 +▁Molekül 1 +▁Tunesien 1 +▁Quartal 1 +Christdemokraten 1 +▁Liege 1 +ý 1 +▁verschwunden 1 +ć 1 +▁Teufel 1 +▁einzubeziehen 1 +▁äußere 1 +▁College 1 +▁Effektivität 1 +▁Alpha 1 +▁Komplettpreis 1 +▁Assoziierung 1 +▁Sauerstoff 1 +▁Thailand 1 +▁gescheitert 1 +▁Bezirk 1 +▁Könnte 1 +▁hübsch 1 +▁Befreiung 1 +schmelz 1 +Automat 1 +▁Befürchtung 1 +▁aggressiv 1 +▁erforschen 1 +▁berühmt 1 +ière 1 +▁Legitimität 1 +▁Nichtregierungs 1 +belastung 1 +computer 1 +Haushalt 1 +▁Kalifornien 1 +▁Träger 1 +▁strafrechtlich 1 +▁unberührt 1 +▁größtenteils 1 +▁Animation 1 +▁Content 1 +▁verstoßen 1 +gesteckt 1 +zusammen 1 +▁Vielmehr 1 +▁zügig 1 +▁spätestens 1 +▁Neuigkeiten 1 +▁verfasst 1 +▁rief 1 +Ausnahmeregelung 1 +völker 1 +▁Föderation 1 +▁Erdgas 1 +style 1 +▁kriminelle 1 +▁Parallel 1 +▁feiern 1 +▁Surf 1 +▁Wikitravel 1 +б 1 +▁Toleranz 1 +▁beantragt 1 +▁Ängste 1 +geholt 1 +▁ideologisch 1 +dauerlicherweise 1 +▁Cocktail 1 +▁Errungenschaft 1 +▁koordinieren 1 +▁eigenständige 1 +▁Spalte 1 +▁gelb 1 +▁Simbabwe 1 +▁fortgeschritten 1 +theorie 1 +▁Autonomie 1 +▁steuerliche 1 +ð 1 +ч 1 +▁Stockholm 1 +▁Vulkan 1 +▁Instabilität 1 +▁verschoben 1 +siedlung 1 +▁ausgebaut 1 +▁Saudi 1 +widrig 1 +▁Boutique 1 +▁Organismen 1 +▁kümmert 1 +▁Security 1 +script 1 +▁Puerto 1 +▁Emotionen 1 +clus 1 +▁Piazza 1 +▁Löhne 1 +▁primär 1 +Gleichbehandlung 1 +Protokoll 1 +ı 1 +▁vorzubereiten 1 +▁ausgeübt 1 +brüche 1 +Taste 1 +▁gesondert 1 +▁Prognose 1 +▁umstritten 1 +▁befreit 1 +schlepp 1 +▁Patient 1 +ysikalisch 1 +philosoph 1 +▁Implementierung 1 +▁komfortabel 1 +▁original 1 +▁männliche 1 +▁konventionelle 1 +▁bekräftigen 1 +hydr 1 +▁Verweis 1 +unwahrscheinlich 1 +fabrik 1 +volumen 1 +▁centre 1 +EWG 1 +▁Migranten 1 +▁verteidigt 1 +▁stehe 1 +▁Erneuerung 1 +▁Immunität 1 +blätter 1 +▁beweist 1 +▁Grundfreiheiten 1 +▁Central 1 +▁schickt 1 +wissenschaftler 1 +verbände 1 +▁spürbar 1 +▁gewohnt 1 +▁abzulehnen 1 +▁Twitter 1 +▁dahingehend 1 +▁Copyright 1 +▁stützt 1 +▁Übersetzer 1 +▁HTML 1 +▁optimistisch 1 +▁anstreben 1 +▁Louis 1 +Präsident 1 +reißen 1 +überwachung 1 +▁Network 1 +▁fortschrittlich 1 +▁Mahlzeit 1 +▁verbieten 1 +© 1 +▁konservativ 1 +▁stattfand 1 +▁geklärt 1 +▁verleiht 1 +point 1 +▁Schweine 1 +▁Hongkong 1 +▁Schottland 1 +▁makroökonomisch 1 +▁Joseph 1 +▁Schriftsteller 1 +▁Etappe 1 +läßlich 1 +▁unendlich 1 +▁verhandelt 1 +▁Nachweis 1 +▁Darlehen 1 +▁Kriterium 1 +▁beeinträchtigen 1 +▁unterliegt 1 +▁verkündet 1 +▁Niederlassung 1 +▁veranstaltet 1 +adresse 1 +▁Attraktionen 1 +▁Zertifizierung 1 +▁harmonisiert 1 +▁veranlasst 1 +▁Dunkel 1 +▁Rekord 1 +▁Hindernis 1 +antwortungsvolle 1 +▁Komplex 1 +▁Demokratische 1 +▁Gültigkeit 1 +▁Prototyp 1 +▁größtmögliche 1 +▁inspirieren 1 +▁Käse 1 +konzern 1 +machung 1 +▁Diejenigen 1 +▁Beendigung 1 +bäume 1 +▁katastrophal 1 +▁leistungsfähige 1 +▁verwirklicht 1 +▁Zubehör 1 +▁widmet 1 +▁bewahrt 1 +▁Herberge 1 +mikro 1 +ähnlich 1 +▁wöchentlich 1 +▁engagieren 1 +▁energisch 1 +▁studieren 1 +α 1 +▁Begrenzung 1 +▁Kernkraftwerk 1 +▁Saddam 1 +▁einschlägige 1 +▁versorgen 1 +beratung 1 +▁leistungsstarke 1 +▁unbegrenzt 1 +ufrechterhaltung 1 +farbig 1 +▁Koalition 1 +▁beachtet 1 +▁ausgeglichen 1 +▁streben 1 +▁Release 1 +▁namentlich 1 +▁Reichweite 1 +▁trinken 1 +▁selbständig 1 +▁Korallen 1 +▁gedruckt 1 +▁wiederhole 1 +ě 1 +▁populär 1 +▁vorzuschlagen 1 +▁Buffet 1 +▁belastet 1 +▁Parlamentarier 1 +▁strukturiert 1 +▁erlangt 1 +firma 1 +▁milde 1 +▁Verschmutzung 1 +▁gratis 1 +▁Entspannen 1 +▁grösste 1 +garantie 1 +▁beunruhigend 1 +▁öfter 1 +▁bestraft 1 +▁unterstreicht 1 +ación 1 +▁weitreichende 1 +▁Komponente 1 +ń 1 +▁Vermeidung 1 +▁unabdingbar 1 +▁befriedigen 1 +▁Folglich 1 +▁Schließung 1 +▁identisch 1 +glücklicherweise 1 +▁anzuerkennen 1 +▁beschädigt 1 +▁hinzuzufügen 1 +▁Wohlergehen 1 +▁Fracht 1 +erhöhung 1 +gesandt 1 +wurf 1 +▁vorangegangenen 1 +▁monatlich 1 +▁Streben 1 +▁Ahnung 1 +▁Blatt 1 +konstruktion 1 +▁Stuttgart 1 +▁registrieren 1 +▁gemeldet 1 +▁anscheinend 1 +▁Verurteilung 1 +chancen 1 +▁Bündnis 1 +▁erholsame 1 +▁klimatisiert 1 +▁Fußgänger 1 +▁Science 1 +▁importiert 1 +▁beunruhigt 1 +▁Tunnel 1 +▁widerspiegelt 1 +▁konstant 1 +▁zugewiesen 1 +▁beauftragt 1 +▁Fragestunde 1 +▁Clinton 1 +▁übereinstimmen 1 +▁Beschaffung 1 +bedürftig 1 +▁Francisco 1 +▁robust 1 +▁unsichtbar 1 +Energieverbrauch 1 +Standard 1 +Konferenz 1 +Website 1 +▁beherrscht 1 +▁harmonisch 1 +▁sonnig 1 +▁clean 1 +▁Vergessen 1 +▁betreibt 1 +kolleg 1 +▁Begegnung 1 +▁Inanspruchnahme 1 +▁Südtirol 1 +▁Rentner 1 +▁symbolisch 1 +▁Daniel 1 +intensiv 1 +lekommunikations 1 +▁Corporate 1 +▁Stornierung 1 +▁voranzutreiben 1 +▁autonom 1 +▁Bewirtschaftung 1 +▁Jagd 1 +▁köstliche 1 +wirksam 1 +Meinungsäußerung 1 +▁Tschetschenien 1 +▁verweigert 1 +▁schweigen 1 +▁human 1 +manager 1 +Mitgliedsstaaten 1 +▁geschäftliche 1 +▁behindern 1 +▁gewerbliche 1 +▁versorgt 1 +▁Sudan 1 +inhaber 1 +▁Interessant 1 +х 1 +▁Feedback 1 +▁Gletscher 1 +▁Wachstumspakt 1 +▁Algerien 1 +▁geachtet 1 +▁heikle 1 +▁BMW 1 +▁Abweichung 1 +▁lebhaft 1 +public 1 +bewusstsein 1 +▁gemischt 1 +▁Positiv 1 +▁kämpft 1 +▁Segment 1 +▁Student 1 +▁Schwierigkeit 1 +▁North 1 +β 1 +— 1 +$ 1 +ř 1 +Š 1 +ę 1 +ò 1 +ø 1 +ë 1 +ο 1 +τ 1 +ň 1 +ц 1 +ε 1 +ι 1 +ж 1 +Č 1 +æ 1 +ï 1 +ş 1 +μ 1 +ā 1 +ą 1 +ν 1 +ĺ 1 +ŕ 1 +ù 1 +ğ 1 +† 1 +ю 1 +ś 1 +ш 1 +É 1 +ا 1 +ì 1 +κ 1 +ρ 1 +⁄ 1 +π 1 +σ 1 +ل 1 +λ 1 +ő 1 +ż 1 +~ 1 +ă 1 +œ 1 +Á 1 + 1 +û 1 +đ 1 +› 1 +В 1 +Å 1 +Р 1 +¿ 1 +υ 1 +^ 1 +£ 1 +‹ 1 +Ž 1 +ű 1 +ί 1 +ф 1 +ī 1 +→ 1 +щ 1 +η 1 +ن 1 +ς 1 +ό 1 +ů 1 +ر 1 +õ 1 +ي 1 +Ÿ 1 +± 1 +э 1 +ã 1 +¬ 1 +П 1 +− 1 +ά 1 +َ 1 +δ 1 +Ó 1 +ē 1 +م 1 +İ 1 +ď 1 +ή 1 +Ø 1 +و 1 +ت 1 +ї 1 +น 1 +ō 1 +ū 1 +К 1 +‟ 1 +γ 1 +А 1 +έ 1 +Ç 1 +ė 1 +ك 1 +‐ 1 +× 1 +า 1 +Í 1 +อ 1 +Н 1 +¡ 1 +¢ 1 +М 1 +่ 1 +ร 1 +ľ 1 +Ѓ 1 +ب 1 +θ 1 +س 1 +ع 1 +О 1 +د 1 +ω 1 +י 1 +÷ 1 +І 1 +Б 1 +ƒ 1 +Ś 1 +Т 1 +Ł 1 +є 1 +Л 1 +ţ 1 +أ 1 +ง 1 +À 1 +′ 1 +Д 1 +Ú 1 +ו 1 +ก 1 +Ż 1 +ِ 1 +เ 1 +ม 1 +ύ 1 +ר 1 +← 1 +χ 1 +้ 1 +ี 1 +¦ 1 +א 1 +ه 1 +Ё 1 +ť 1 +ź 1 +Ñ 1 +φ 1 +И 1 +ة 1 +ว 1 +Ґ 1 +ส 1 +ל 1 +ה 1 +Ý 1 +ÿ 1 +Љ 1 +خ 1 +ิ 1 +Е 1 +ّ 1 +ศ 1 +Ќ 1 +พ 1 +Ő 1 +ค 1 +ั 1 +ะ 1 +њ 1 +‡ 1 +ف 1 +Њ 1 +ด 1 +У 1 +̈ 1 +ב 1 +Ð 1 +З 1 +¥ 1 +‒ 1 +年 1 +ў 1 +ע 1 +ห 1 +년 1 +ท 1 +Ş 1 +̧ 1 +ج 1 +► 1 +Æ 1 +ح 1 +È 1 +Î 1 +※ 1 +Α 1 +ล 1 +Ф 1 +Ω 1 +ώ 1 +Я 1 +ъ 1 +ش 1 +ص 1 +Э 1 +ื 1 +Ê 1 +ņ 1 +ё 1 +ת 1 +ย 1 +ุ 1 +ข 1 +إ 1 +● 1 +ϋ 1 +ξ 1 +ט 1 +þ 1 +ק 1 +ζ 1 +ق 1 +บ 1 +Ń 1 +Ą 1 +، 1 +ْ 1 +Ε 1 +ป 1 +ณ 1 +‰ 1 +ļ 1 +ד 1 +ى 1 +Η 1 +日 1 +ใ 1 +צ 1 +Đ 1 +Π 1 +פ 1 +ต 1 +‛ 1 +Х 1 +מ 1 +­ 1 +Μ 1 +ש 1 +ُ 1 +』 1 +Ш 1 +ѓ 1 +ķ 1 +ם 1 +⇒ 1 +ض 1 +Ι 1 +上 1 +本 1 +็ 1 +ј 1 +Ў 1 +Ď 1 +Τ 1 +Ô 1 +Ě 1 +↑ 1 +√ 1 +和 1 +Ч 1 +Þ 1 +Ї 1 +食 1 +で 1 +ู 1 +แ 1 +ํ 1 +จ 1 +Є 1 +ช 1 +Κ 1 +Œ 1 +Ο 1 +ѕ 1 +ן 1 +ط 1 +ა 1 +Ľ 1 +Ř 1 +Δ 1 +Ц 1 +غ 1 +ー 1 +す 1 +♫ 1 +ไ 1 +Џ 1 +Σ 1 +נ 1 +Ć 1 +ز 1 +ی 1 +、 1 +【 1 +̋ 1 +Ν 1 +い 1 +。 1 +ґ 1 +Ę 1 +Ĺ 1 +Ō 1 +自 1 +̃ 1 +् 1 +Ė 1 +ʿ 1 +Γ 1 +Θ 1 +Ж 1 +ז 1 +ი 1 +ლ 1 +ረ 1 +】 1 +克 1 +顶 1 +ų 1 +三 1 +< 1 +ג 1 +ス 1 +文 1 +የ 1 +て 1 +ผ 1 +寺 1 +በ 1 +来 1 +手 1 +球 1 +Š 1 +– 1 +š 1 +Ë 1 +ŏ 1 +ŭ 1 +Ų 1 +̊ 1 +Ј 1 +љ 1 +ء 1 +آ 1 +ث 1 +र 1 +ዓ 1 +ይ 1 +★ 1 +治 1 +Ă 1 +≥ 1 +Ò 1 +『 1 +新 1 +は 1 +Õ 1 +免 1 +疫 1 +博 1 +场 1 +的 1 +网 1 +ン 1 +し 1 +も 1 +ึ 1 +ธ 1 +გ 1 +ე 1 +რ 1 +ħ 1 +ǎ 1 +प 1 +出 1 +武 1 +Ï 1 +Ň 1 +Ů 1 +ː 1 +̤ 1 +ќ 1 +ң 1 +۱ 1 +० 1 +დ 1 +ო 1 +ს 1 +ገ 1 +ệ 1 +≪ 1 +≫ 1 +◎ 1 +♥ 1 +县 1 +天 1 +市 1 +東 1 +江 1 +白 1 +空 1 +蛋 1 +語 1 +语 1 +込 1 +青 1 +ǐ 1 +כ 1 +द 1 +ा 1 +コ 1 +ナ 1 +一 1 +中 1 +山 1 +鋝 1 +ቀ 1 +̄ 1 +む 1 +有 1 +不 1 +乐 1 +在 1 +娱 1 +正 1 +赌 1 +ま 1 +か 1 +た 1 +っ 1 +く 1 +് 1 +Φ 1 +を 1 +チ 1 +マ 1 +・ 1 +Ī 1 +Ğ 1 +̍ 1 +ח 1 +ئ 1 +े 1 +鋍 1 +Ì 1 +ĕ 1 +Ū 1 +ơ 1 +ǔ 1 +̨ 1 +ً 1 +म 1 +ह 1 +ถ 1 +์ 1 +ຸ 1 +ህ 1 +ም 1 +ሻ 1 +ተ 1 +አ 1 +ኣ 1 +ው 1 +ጽ 1 +፣ 1 +ṭ 1 +ạ 1 +ế 1 +ễ 1 +↓ 1 +⇢ 1 +≈ 1 +■ 1 +◆ 1 +ち 1 +る 1 +イ 1 +オ 1 +テ 1 +ル 1 +了 1 +修 1 +分 1 +匿 1 +名 1 +吗 1 +敏 1 +木 1 +机 1 +站 1 +鋓 1 +问 1 +舁 1 +Ű 1 +ψ 1 +Ю 1 +ֳ 1 +仁 1 +水 1 +清 1 +石 1 +简 1 +谷 1 +እ 1 +ኦ 1 +宣 1 +መ 1 +ጨ 1 +人 1 +生 1 +さ 1 +ん 1 +お 1 +に 1 +ワ 1 +是 1 +キ 1 +や 1 +が 1 +つ 1 +と 1 +大 1 +屋 1 +โ 1 +ซ 1 +タ 1 +ә 1 +ภ 1 +ვ 1 +╚ 1 +ウ 1 +ユ 1 +体 1 +北 1 +龙 1 +Λ 1 +ћ 1 +ס 1 +ذ 1 +پ 1 +अ 1 +Ḥ 1 +振 1 +ḍ 1 +书 1 +小 1 +毛 1 +谢 1 +鰃 1 +Û 1 +Ĉ 1 +ĩ 1 +Į 1 +į 1 +ǵ 1 +ɑ 1 +ə 1 +ʒ 1 +ΐ 1 +Й 1 +Ъ 1 +ғ 1 +ָ 1 +ک 1 +ھ 1 +ۇ 1 +च 1 +ञ 1 +त 1 +ल 1 +श 1 +२ 1 +ন 1 +য 1 +় 1 +া 1 +૦ 1 +೦ 1 +೧ 1 +ษ 1 +ๆ 1 +ბ 1 +ნ 1 +პ 1 +ღ 1 +ხ 1 +ሕ 1 +ር 1 +ቶ 1 +ክ 1 +ዲ 1 +ả 1 +ổ 1 +ớ 1 +ῆ 1 +▪ 1 +▼ 1 +◊ 1 +○ 1 +♣ 1 +➲ 1 +あ 1 +う 1 +じ 1 +だ 1 +ね 1 +へ 1 +み 1 +め 1 +よ 1 +サ 1 +ジ 1 +ダ 1 +ツ 1 +ド 1 +ニ 1 +ヌ 1 +ハ 1 +ビ 1 +ベ 1 +ペ 1 +ボ 1 +ミ 1 +メ 1 +ヨ 1 +二 1 +从 1 +你 1 +内 1 +刘 1 +剥 1 +危 1 +受 1 +后 1 +幹 1 +张 1 +微 1 +応 1 +思 1 +戸 1 +欢 1 +歌 1 +气 1 +测 1 +海 1 +港 1 +溥 1 +牌 1 +章 1 +線 1 +舖 1 +花 1 +见 1 +言 1 +过 1 +送 1 +遝 1 +都 1 +里 1 +际 1 +题 1 +黚 1 +이 1 +ッ 1 +ĉ 1 +ģ 1 +Ť 1 +⋅ 1 +け 1 +举 1 +德 1 +管 1 +箱 1 +舫 1 +鋜 1 +陵 1 +ɛ 1 +ḥ 1 +ṣ 1 +╩ 1 +こ 1 +伸 1 +原 1 +国 1 +深 1 +鋖 1 +Ρ 1 +─ 1 +东 1 +五 1 +应 1 +方 1 +西 1 +の 1 +デ 1 +フ 1 +ホ 1 +ラ 1 +リ 1 +所 1 +グ 1 +え 1 +き 1 +げ 1 +ら 1 +ろ 1 +見 1 +部 1 +Ţ 1 +Ћ 1 +ェ 1 +Χ 1 +ђ 1 +ү 1 +һ 1 +ব 1 +ക 1 +ദ 1 +ല 1 +വ 1 +ഷ 1 +ീ 1 +于 1 +依 1 +头 1 +庆 1 +挂 1 +火 1 +用 1 +至 1 +车 1 +重 1 +除 1 +Ġ 1 +ŷ 1 +೨ 1 +余 1 +其 1 +叫 1 +吴 1 +咬 1 +引 1 +扈 1 +才 1 +晏 1 +牙 1 +紧 1 +跋 1 +ġ 1 +≤ 1 +゚ 1 +呀 1 +如 1 +届 1 +岩 1 +损 1 +澤 1 +続 1 +臺 1 +舩 1 +ở 1 +Ù 1 +Ā 1 +ĝ 1 +ĥ 1 +ĵ 1 +ĸ 1 +Ņ 1 +Ŝ 1 +ŝ 1 +Ź 1 +ǒ 1 +Ǻ 1 +ț 1 +ɔ 1 +ɡ 1 +ʐ 1 +ˆ 1 +Ά 1 +Ί 1 +Ό 1 +Ώ 1 +Β 1 +Υ 1 +ϊ 1 +Ѕ 1 +ұ 1 +Ӓ 1 +ө 1 +؛ 1 +ؤ 1 +ـ 1 +ٌ 1 +ځ 1 +ڭ 1 +ण 1 +न 1 +ब 1 +ि 1 +ु 1 +१ 1 +३ 1 +ই 1 +র 1 +হ 1 +ি 1 +ু 1 +ো 1 +্ 1 +૧ 1 +૨ 1 +೫ 1 +೬ 1 +೯ 1 +പ 1 +ญ 1 +ฎ 1 +ฝ 1 +ฟ 1 +฿ 1 +ຄ 1 +ງ 1 +ດ 1 +ຖ 1 +ນ 1 +ມ 1 +ວ 1 +ັ 1 +ስ 1 +ቐ 1 +ት 1 +Ṭ 1 +ẋ 1 +ẩ 1 +ậ 1 +ề 1 +ồ 1 +ộ 1 +ụ 1 +ủ 1 +ỹ 1 +ἰ 1 +₤ 1 +∞ 1 +█ 1 +◇ 1 +◈ 1 +☆ 1 +☑ 1 +☼ 1 +♀ 1 +♂ 1 +♦ 1 +」 1 +〜 1 +ぃ 1 +ぎ 1 +ぐ 1 +ご 1 +ざ 1 +ず 1 +せ 1 +ぜ 1 +そ 1 +ぞ 1 +ど 1 +な 1 +ぬ 1 +ば 1 +ひ 1 +び 1 +ふ 1 +ぶ 1 +べ 1 +ほ 1 +ぼ 1 +ゆ 1 +り 1 +れ 1 +わ 1 +ア 1 +ィ 1 +エ 1 +カ 1 +ガ 1 +ギ 1 +ク 1 +ケ 1 +ゲ 1 +ゴ 1 +ザ 1 +シ 1 +セ 1 +ゼ 1 +ソ 1 +ゾ 1 +ト 1 +ネ 1 +ノ 1 +バ 1 +パ 1 +ヒ 1 +ピ 1 +ブ 1 +プ 1 +ヘ 1 +ポ 1 +ム 1 +モ 1 +ヤ 1 +レ 1 +ロ 1 +ヴ 1 +ㄤ 1 +七 1 +万 1 +丈 1 +下 1 +义 1 +习 1 +事 1 +京 1 +仪 1 +仲 1 +价 1 +会 1 +但 1 +何 1 +倍 1 +儀 1 +儉 1 +光 1 +公 1 +刨 1 +則 1 +剣 1 +务 1 +动 1 +勧 1 +区 1 +去 1 +参 1 +及 1 +取 1 +只 1 +可 1 +台 1 +吃 1 +向 1 +君 1 +吧 1 +吹 1 +吾 1 +告 1 +喜 1 +嘛 1 +四 1 +回 1 +囧 1 +固 1 +國 1 +堂 1 +墩 1 +央 1 +好 1 +娃 1 +子 1 +孴 1 +宝 1 +客 1 +家 1 +寨 1 +寶 1 +寸 1 +尔 1 +局 1 +岭 1 +崩 1 +川 1 +希 1 +広 1 +庚 1 +弁 1 +彭 1 +役 1 +必 1 +怒 1 +怡 1 +性 1 +意 1 +慢 1 +成 1 +戴 1 +抜 1 +探 1 +接 1 +掻 1 +握 1 +搞 1 +摂 1 +撮 1 +放 1 +施 1 +昇 1 +星 1 +春 1 +显 1 +普 1 +曌 1 +曝 1 +書 1 +最 1 +板 1 +查 1 +柱 1 +桂 1 +检 1 +楽 1 +檀 1 +次 1 +止 1 +步 1 +気 1 +汉 1 +没 1 +泥 1 +注 1 +泽 1 +洛 1 +活 1 +浦 1 +済 1 +満 1 +漢 1 +焼 1 +煮 1 +爱 1 +父 1 +片 1 +率 1 +玉 1 +王 1 +班 1 +琢 1 +畢 1 +畿 1 +疆 1 +疑 1 +百 1 +皇 1 +直 1 +相 1 +眼 1 +瞎 1 +知 1 +确 1 +示 1 +礼 1 +神 1 +祿 1 +福 1 +秀 1 +竞 1 +端 1 +竹 1 +第 1 +答 1 +紅 1 +終 1 +統 1 +纹 1 +细 1 +统 1 +维 1 +罗 1 +群 1 +義 1 +羽 1 +耀 1 +胞 1 +能 1 +臘 1 +臨 1 +致 1 +舐 1 +航 1 +葉 1 +葱 1 +蒸 1 +蔚 1 +藤 1 +街 1 +视 1 +觉 1 +訓 1 +記 1 +請 1 +许 1 +诘 1 +请 1 +调 1 +貌 1 +貮 1 +货 1 +质 1 +赤 1 +赵 1 +超 1 +足 1 +軍 1 +辱 1 +迎 1 +返 1 +连 1 +迷 1 +道 1 +遭 1 +郑 1 +鄭 1 +酉 1 +鋘 1 +鋟 1 +镜 1 +閩 1 +闽 1 +阳 1 +陀 1 +降 1 +陶 1 +電 1 +静 1 +音 1 +预 1 +飞 1 +飼 1 +馬 1 +鲁 1 +鵜 1 +黄 1 +黨 1 +검 1 +고 1 +군 1 +나 1 +누 1 +님 1 +단 1 +당 1 +드 1 +맨 1 +반 1 +번 1 +법 1 +베 1 +별 1 +빛 1 +성 1 +스 1 +신 1 +에 1 +왕 1 +요 1 +유 1 +자 1 +작 1 +조 1 +짝 1 +천 1 +추 1 +터 1 +̇ 1 +入 1 +凄 1 +千 1 +吳 1 +实 1 +康 1 +彰 1 +旨 1 +森 1 +睦 1 +苑 1 +蔓 1 +関 1 +鰈 1 +鰊 1 +鲞 1 +鹿 1 +ˈ 1 +Ẳ 1 +丘 1 +井 1 +今 1 +圆 1 +安 1 +明 1 +李 1 +甜 1 +田 1 +羅 1 +茶 1 +覺 1 +雄 1 +鴻 1 +대 1 +르 1 +체 1 +층 1 +ʀ 1 +愛 1 +无 1 +产 1 +住 1 +反 1 +場 1 +景 1 +济 1 +益 1 +种 1 +经 1 +而 1 +行 1 +非 1 +力 1 +学 1 +常 1 +朝 1 +留 1 +Ђ 1 +џ 1 +湖 1 +綺 1 +麗 1 + 1 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_de/spm_unigram10000.model b/SpeechT5/SpeechUT/dataset/MuSTC/en_de/spm_unigram10000.model new file mode 100644 index 0000000000000000000000000000000000000000..213639dda0b5041bde005f998e5bf9e22328884b --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_de/spm_unigram10000.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2122cbc33ff8fd4c0e25829d02365b869fb46b21099a009e54a34b352069a4a +size 406948 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_es/config.yaml b/SpeechT5/SpeechUT/dataset/MuSTC/en_es/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dce5f63011a8c33a4d12eec569fdcc91ea299f68 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_es/config.yaml @@ -0,0 +1,3 @@ +vocab_filename: dict.spm.txt +src_vocab_filename: dict.kmu.txt + diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_es/config_enes.yaml b/SpeechT5/SpeechUT/dataset/MuSTC/en_es/config_enes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd080a05500211cade57d80056c8ce311ce4c0c2 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_es/config_enes.yaml @@ -0,0 +1,14 @@ +bpe_tokenizer: + bpe: sentencepiece + sentencepiece_model: spm_unigram10000.model + +sampling_alpha: 1.0 +shuffle: false +use_audio_input: true +use_sample_rate: 16000 + +vocab_filename: dict.spm.txt + +# required by speech_to_text task but never used +input_channels: 1 +input_feat_per_channel: 1 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_es/dict.kmu.txt b/SpeechT5/SpeechUT/dataset/MuSTC/en_es/dict.kmu.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbfe59e554d6234f3631d8d09d9281c2160f4675 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_es/dict.kmu.txt @@ -0,0 +1,500 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 +364 364 +365 365 +366 366 +367 367 +368 368 +369 369 +370 370 +371 371 +372 372 +373 373 +374 374 +375 375 +376 376 +377 377 +378 378 +379 379 +380 380 +381 381 +382 382 +383 383 +384 384 +385 385 +386 386 +387 387 +388 388 +389 389 +390 390 +391 391 +392 392 +393 393 +394 394 +395 395 +396 396 +397 397 +398 398 +399 399 +400 400 +401 401 +402 402 +403 403 +404 404 +405 405 +406 406 +407 407 +408 408 +409 409 +410 410 +411 411 +412 412 +413 413 +414 414 +415 415 +416 416 +417 417 +418 418 +419 419 +420 420 +421 421 +422 422 +423 423 +424 424 +425 425 +426 426 +427 427 +428 428 +429 429 +430 430 +431 431 +432 432 +433 433 +434 434 +435 435 +436 436 +437 437 +438 438 +439 439 +440 440 +441 441 +442 442 +443 443 +444 444 +445 445 +446 446 +447 447 +448 448 +449 449 +450 450 +451 451 +452 452 +453 453 +454 454 +455 455 +456 456 +457 457 +458 458 +459 459 +460 460 +461 461 +462 462 +463 463 +464 464 +465 465 +466 466 +467 467 +468 468 +469 469 +470 470 +471 471 +472 472 +473 473 +474 474 +475 475 +476 476 +477 477 +478 478 +479 479 +480 480 +481 481 +482 482 +483 483 +484 484 +485 485 +486 486 +487 487 +488 488 +489 489 +490 490 +491 491 +492 492 +493 493 +494 494 +495 495 +496 496 +497 497 +498 498 +499 499 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_es/dict.spm.txt b/SpeechT5/SpeechUT/dataset/MuSTC/en_es/dict.spm.txt new file mode 100644 index 0000000000000000000000000000000000000000..194ae6f610da4c2ec1975ba3aa9f45fe527c98c3 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_es/dict.spm.txt @@ -0,0 +1,9997 @@ +▁de 1 +, 1 +▁la 1 +. 1 +▁y 1 +▁en 1 +▁que 1 +▁el 1 +s 1 +▁a 1 +▁los 1 +▁las 1 +▁del 1 +▁se 1 +▁para 1 +▁con 1 +▁un 1 +▁por 1 +n 1 +▁una 1 +▁no 1 +▁su 1 +▁al 1 +▁es 1 +▁( 1 +r 1 +▁sobre 1 +) 1 +▁El 1 +▁como 1 +▁o 1 +▁ 1 +▁lo 1 +▁La 1 +▁más 1 +▁En 1 +es 1 +▁ha 1 +: 1 +do 1 +a 1 +; 1 +▁sus 1 +▁A 1 +o 1 +▁Naciones 1 +▁Unidas 1 +da 1 +▁entre 1 +▁Estados 1 +▁este 1 +/ 1 +se 1 +mente 1 +▁Comisión 1 +▁también 1 +▁e 1 +▁países 1 +▁desarrollo 1 +▁General 1 +mos 1 +▁Consejo 1 +▁si 1 +▁esta 1 +▁han 1 +▁Y 1 +▁contra 1 +▁son 1 +ndo 1 +▁derechos 1 +▁todos 1 +e 1 +- 1 +▁in 1 +ción 1 +▁informe 1 +▁Comité 1 +▁" 1 +▁está 1 +▁Se 1 +▁No 1 +á 1 +▁ser 1 +l 1 +▁re 1 +▁parte 1 +▁internacional 1 +▁Los 1 +? 1 +la 1 +▁período 1 +▁personas 1 +▁me 1 +▁información 1 +dos 1 +▁¿ 1 +▁todo 1 +▁medidas 1 +ba 1 +▁años 1 +▁resolución 1 +das 1 +t 1 +▁así 1 +▁mi 1 +▁pero 1 +▁Estado 1 +▁derecho 1 +A 1 +▁humanos 1 +▁puede 1 +▁programa 1 +▁Por 1 +ó 1 +▁sin 1 +▁le 1 +▁muy 1 +▁otros 1 +▁hacer 1 +▁sesiones 1 +C 1 +▁actividades 1 +▁artículo 1 +ta 1 +ra 1 +lo 1 +en 1 +▁Asamblea 1 +▁cuando 1 +▁ese 1 +▁niños 1 +▁1 1 +y 1 +▁proyecto 1 +▁trabajo 1 +▁Presidente 1 +▁Gobierno 1 +▁respecto 1 +▁aplicación 1 +▁miembros 1 +▁fin 1 +▁sistema 1 +▁“ 1 +▁dos 1 +d 1 +▁forma 1 +▁- 1 +le 1 +▁vez 1 +▁todas 1 +▁tiene 1 +▁Secretario 1 +▁seguridad 1 +▁país 1 +), 1 +ía 1 +▁otras 1 +to 1 +). 1 +▁Las 1 +ca 1 +te 1 +▁mundo 1 +ron 1 +▁mujeres 1 +ar 1 +▁apoyo 1 +▁De 1 +i 1 +an 1 +on 1 +S 1 +ciones 1 +▁Grupo 1 +▁están 1 +▁recursos 1 +▁hay 1 +▁hecho 1 +▁cada 1 +▁servicios 1 +▁Es 1 +c 1 +p 1 +de 1 +▁lugar 1 +u 1 +▁debe 1 +▁Convención 1 +▁esa 1 +▁situación 1 +▁cuenta 1 +▁fue 1 +▁ya 1 +▁2 1 +▁particular 1 +ma 1 +▁durante 1 +▁tiempo 1 +▁había 1 +▁desde 1 +é 1 +▁programas 1 +▁internacionales 1 +▁organizaciones 1 +▁importante 1 +▁proceso 1 +▁tanto 1 +▁cooperación 1 +▁era 1 +▁esto 1 +▁pueden 1 +no 1 +▁c 1 +co 1 +▁Si 1 +▁Internacional 1 +▁República 1 +di 1 +▁política 1 +▁porque 1 +▁S 1 +▁Seguridad 1 +▁manera 1 +▁3 1 +▁asistencia 1 +er 1 +▁paz 1 +▁nacional 1 +▁párrafo 1 +▁relación 1 +▁Unión 1 +nte 1 +re 1 +" 1 +▁b 1 +▁Pero 1 +▁gran 1 +▁cuestiones 1 +▁Con 1 +▁año 1 +▁nos 1 +sa 1 +▁bien 1 +▁sido 1 +▁general 1 +▁nivel 1 +▁C 1 +▁I 1 +▁cuestión 1 +les 1 +▁pro 1 +▁mayor 1 +▁Al 1 +▁uno 1 +▁vida 1 +ti 1 +▁protección 1 +▁hasta 1 +▁Sr 1 +▁personal 1 +▁caso 1 +▁sólo 1 +▁E 1 +▁tienen 1 +án 1 +▁estos 1 +▁Derechos 1 +▁mismo 1 +▁nacionales 1 +P 1 +me 1 +rá 1 +▁presente 1 +▁P 1 +▁Europea 1 +g 1 +▁19 1 +rse 1 +▁menos 1 +m 1 +▁hace 1 +ría 1 +▁esos 1 +ce 1 +▁Conferencia 1 +ci 1 +cu 1 +▁eso 1 +L 1 +▁O 1 +▁millones 1 +▁marco 1 +ro 1 +▁acuerdo 1 +▁ver 1 +▁dólares 1 +▁conformidad 1 +í 1 +▁número 1 +▁posible 1 +▁salud 1 +na 1 +▁social 1 +ban 1 +▁haya 1 +ación 1 +▁ejemplo 1 +▁armas 1 +▁qué 1 +▁sea 1 +▁mediante 1 +▁10 1 +▁di 1 +▁políticas 1 +b 1 +▁4 1 +▁tema 1 +▁datos 1 +▁atención 1 +▁antes 1 +▁capacidad 1 +▁ni 1 +▁ante 1 +” 1 +▁les 1 +▁Así 1 +que 1 +▁decisión 1 +E 1 +▁partes 1 +▁cosas 1 +▁labor 1 +▁esas 1 +▁embargo 1 +ga 1 +▁5 1 +▁algo 1 +▁materia 1 +ten 1 +▁estas 1 +li 1 +▁tener 1 +▁ahora 1 +▁algunos 1 +▁tres 1 +▁Especial 1 +▁ex 1 +▁deben 1 +▁otra 1 +idad 1 +▁esfuerzos 1 +so 1 +▁mejor 1 +▁examen 1 +▁trata 1 +▁otro 1 +▁decir 1 +z 1 +▁nuestra 1 +▁También 1 +▁mujer 1 +▁nuevo 1 +▁comunidad 1 +▁medio 1 +▁pre 1 +▁acceso 1 +▁condiciones 1 +go 1 +▁donde 1 +al 1 +▁base 1 +▁diciembre 1 +▁tipo 1 +▁nuestro 1 +je 1 +vi 1 +B 1 +▁Además 1 +▁mucho 1 +▁Oficina 1 +▁casos 1 +▁Re 1 +▁objetivos 1 +▁problemas 1 +▁Programa 1 +▁después 1 +▁Sin 1 +▁6 1 +▁educación 1 +▁Desarrollo 1 +nos 1 +mi 1 +▁toda 1 +▁15 1 +▁Para 1 +▁Parte 1 +▁te 1 +in 1 +ja 1 +▁instituciones 1 +mo 1 +▁día 1 +▁co 1 +ri 1 +▁Unidos 1 +▁informes 1 +▁Humanos 1 +▁momento 1 +▁importancia 1 +▁B 1 +pa 1 +▁sino 1 +▁primera 1 +▁12 1 +D 1 +▁Europa 1 +▁responsabilidad 1 +▁cualquier 1 +▁tan 1 +▁Me 1 +▁especial 1 +▁ellos 1 +▁participación 1 +▁cual 1 +▁grupos 1 +▁7 1 +▁África 1 +▁normas 1 +▁Esta 1 +el 1 +▁población 1 +▁necesidad 1 +ve 1 +▁reunión 1 +▁va 1 +T 1 +tra 1 +". 1 +▁problema 1 +▁disposiciones 1 +▁sector 1 +▁ley 1 +tu 1 +▁— 1 +▁8 1 +ne 1 +▁grupo 1 +▁violencia 1 +▁Mundial 1 +▁documento 1 +▁primer 1 +▁Organización 1 +▁persona 1 +▁sesión 1 +▁sociedad 1 +▁uso 1 +▁muchos 1 +▁ca 1 +▁solo 1 +▁d 1 +▁Parlamento 1 +▁Ley 1 +▁resultados 1 +h 1 +▁ayuda 1 +▁recomendaciones 1 +un 1 +os 1 +▁dar 1 +▁mejorar 1 +▁Este 1 +ado 1 +▁gente 1 +▁lograr 1 +▁he 1 +O 1 +▁aquí 1 +▁In 1 +▁tal 1 +k 1 +▁nota 1 +▁20 1 +▁mundial 1 +▁necesario 1 +▁F 1 +▁necesidades 1 +▁9 1 +▁cómo 1 +▁debería 1 +▁horas 1 +▁gestión 1 +▁nombre 1 +▁Lo 1 +▁fecha 1 +bi 1 +▁Partes 1 +▁objetivo 1 +M 1 +si 1 +tar 1 +G 1 +▁región 1 +▁Miembros 1 +▁plan 1 +▁14 1 +▁proyectos 1 +▁18 1 +▁presupuesto 1 +▁Como 1 +▁inter 1 +▁11 1 +los 1 +▁Trabajo 1 +va 1 +▁lucha 1 +▁ello 1 +f 1 +▁control 1 +las 1 +▁promover 1 +00 1 +▁poder 1 +▁Tribunal 1 +2 1 +as 1 +▁hemos 1 +tas 1 +▁Mi 1 +▁podría 1 +▁tenemos 1 +▁especialmente 1 +▁G 1 +▁incluso 1 +▁civil 1 +Risas 1 +qui 1 +▁nuestros 1 +▁garantizar 1 +▁Ma 1 +▁discriminación 1 +▁regionales 1 +▁cambio 1 +F 1 +▁medida 1 +res 1 +▁dentro 1 +▁crear 1 +▁13 1 +▁acerca 1 +v 1 +tos 1 +▁debate 1 +▁sentido 1 +ter 1 +▁hoy 1 +▁estaba 1 +▁junio 1 +▁pobreza 1 +▁mayoría 1 +ir 1 +.1 1 +▁hacia 1 +▁virtud 1 +▁total 1 +▁plazo 1 +▁algunas 1 +▁propuesta 1 +x 1 +▁estado 1 +▁aún 1 +▁papel 1 +▁sociales 1 +▁medios 1 +▁favor 1 +▁respuesta 1 +▁haber 1 +▁dice 1 +▁modo 1 +▁[ 1 +▁decisiones 1 +▁p 1 +▁sí 1 +... 1 +▁per 1 +bo 1 +▁30 1 +▁principios 1 +▁organización 1 +U 1 +▁autoridades 1 +▁terrorismo 1 +▁práctica 1 +▁futuro 1 +▁adoptar 1 +▁Su 1 +▁podemos 1 +I 1 +▁evaluación 1 +▁empleo 1 +is 1 +▁anexo 1 +▁objeto 1 +▁habían 1 +▁nuevas 1 +▁acción 1 +▁organismos 1 +▁calidad 1 +▁pertinentes 1 +tó 1 +] 1 +▁delegación 1 +▁mo 1 +▁investigación 1 +▁punto 1 +▁cuanto 1 +pi 1 +▁cabo 1 +▁examinar 1 +▁productos 1 +ria 1 +gra 1 +▁mandato 1 +▁nosotros 1 +dor 1 +ble 1 +▁16 1 +▁Esto 1 +▁establecer 1 +▁Una 1 +▁poco 1 +▁siempre 1 +▁zonas 1 +▁Un 1 +▁empresas 1 +▁bajo 1 +ven 1 +▁según 1 +▁órganos 1 +▁creación 1 +▁nueva 1 +ida 1 +▁legislación 1 +▁tu 1 +▁orden 1 +▁ma 1 +▁julio 1 +▁relativas 1 +▁público 1 +▁realidad 1 +▁será 1 +▁Social 1 +ada 1 +▁Nacional 1 +▁seguir 1 +▁M 1 +▁aumento 1 +▁libertad 1 +▁tecnología 1 +▁estamos 1 +▁vi 1 +▁Señor 1 +▁Ha 1 +▁importantes 1 +▁septiembre 1 +gi 1 +▁principales 1 +▁yo 1 +▁ejecución 1 +▁fondos 1 +▁sostenible 1 +ú 1 +▁segundo 1 +▁falta 1 +▁17 1 +▁días 1 +▁misma 1 +▁además 1 +▁realmente 1 +▁diferentes 1 +▁mercado 1 +▁principio 1 +▁pública 1 +▁siguientes 1 +▁Declaración 1 +ntes 1 +▁agua 1 +▁gobiernos 1 +▁representantes 1 +▁financiación 1 +▁Protocolo 1 +▁económica 1 +fi 1 +▁declaración 1 +▁contexto 1 +▁función 1 +▁través 1 +▁u 1 +▁igualdad 1 +ur 1 +▁frente 1 +pe 1 +▁arreglo 1 +▁pa 1 +▁efectos 1 +▁D 1 +▁noviembre 1 +▁unos 1 +▁actual 1 +▁económico 1 +▁él 1 +▁sigue 1 +▁largo 1 +▁final 1 +▁comercio 1 +encia 1 +▁gubernamentales 1 +▁prácticas 1 +▁aplicar 1 +za 1 +▁nuevos 1 +iendo 1 +▁octubre 1 +dores 1 +▁Junta 1 +N 1 +ido 1 +▁gastos 1 +▁estar 1 +ch 1 +▁siendo 1 +▁muchas 1 +▁Nueva 1 +▁Fondo 1 +▁to 1 +▁tra 1 +▁establecimiento 1 +▁niño 1 +ncia 1 +▁funcionarios 1 +▁ciudadanos 1 +▁formas 1 +▁siguiente 1 +▁víctimas 1 +or 1 +▁justicia 1 +▁valor 1 +▁conflictos 1 +ían 1 +▁último 1 +▁fuera 1 +▁po 1 +rán 1 +▁posibilidad 1 +▁tras 1 +▁causa 1 +▁operaciones 1 +tro 1 +▁Asuntos 1 +▁figura 1 +▁espacio 1 +▁grandes 1 +▁equipo 1 +▁pregunta 1 +▁coordinación 1 +▁Secretaría 1 +▁cumplimiento 1 +▁sistemas 1 +▁interés 1 +▁ayudar 1 +▁Ministerio 1 +miento 1 +▁reuniones 1 +ru 1 +▁pasado 1 +fa 1 +tiva 1 +▁energía 1 +▁regional 1 +' 1 +▁nucleares 1 +▁representa 1 +▁ámbito 1 +▁promoción 1 +▁opinión 1 +ge 1 +jo 1 +▁pueblo 1 +▁fundamental 1 +▁poner 1 +▁Departamento 1 +ización 1 +fe 1 +▁edad 1 +▁realizar 1 +▁representante 1 +▁autor 1 +▁comunicación 1 +▁técnica 1 +▁crisis 1 +▁ra 1 +▁historia 1 +▁reforma 1 +▁Europeo 1 +▁procedimientos 1 +▁mantenimiento 1 +▁sean 1 +▁creo 1 +5 1 +▁resultado 1 +R 1 +▁pueda 1 +fer 1 +▁obligaciones 1 +vo 1 +▁marzo 1 +▁resoluciones 1 +▁compromiso 1 +6 1 +▁reducir 1 +▁carácter 1 +▁texto 1 +▁T 1 +▁tomar 1 +▁2005 1 +▁idea 1 +▁Informe 1 +▁veces 1 +emos 1 +▁fueron 1 +▁aumentar 1 +▁correspondiente 1 +▁puedan 1 +▁dicho 1 +▁propuestas 1 +▁mecanismos 1 +▁Israel 1 +▁Ca 1 +▁común 1 +ismo 1 +▁Ahora 1 +▁cuatro 1 +▁gracias 1 +▁policía 1 +▁lista 1 +▁actos 1 +▁locales 1 +rio 1 +▁abril 1 +▁considera 1 +▁ambiente 1 +▁meses 1 +▁serie 1 +▁familia 1 +▁instrumentos 1 +▁da 1 +▁género 1 +▁mayo 1 +lla 1 +▁Cuando 1 +▁2000 1 +▁solución 1 +▁conflicto 1 +▁toma 1 +▁jóvenes 1 +▁L 1 +ando 1 +▁amplia 1 +▁of 1 +man 1 +▁guerra 1 +H 1 +po 1 +3 1 +▁prevención 1 +▁mientras 1 +▁varios 1 +▁aspectos 1 +▁diálogo 1 +▁Carta 1 +▁Misión 1 +▁preocupación 1 +▁aunque 1 +▁ven 1 +▁zona 1 +1 1 +▁estrategia 1 +▁cinco 1 +▁obstante 1 +▁expertos 1 +▁vista 1 +il 1 +por 1 +con 1 +▁Pa 1 +▁i 1 +▁asegurar 1 +▁iniciativas 1 +▁China 1 +▁efecto 1 +ya 1 +▁conjunto 1 +▁Pacto 1 +▁ve 1 +sión 1 +▁puesto 1 +ados 1 +▁f 1 +▁capacitación 1 +▁demás 1 +▁fundamentales 1 +be 1 +▁the 1 +▁entonces 1 +); 1 +▁experiencia 1 +▁sería 1 +▁servicio 1 +▁dijo 1 +▁facilitar 1 +▁24 1 +▁documentos 1 +▁alto 1 +▁América 1 +us 1 +▁2004 1 +cha 1 +▁transporte 1 +▁procedimiento 1 +sta 1 +▁cumplir 1 +▁UE 1 +▁pueblos 1 +▁encontrar 1 +▁enero 1 +▁2002 1 +▁Tratado 1 +▁Convenio 1 +▁25 1 +▁nuestras 1 +▁2001 1 +▁resulta 1 +▁hombres 1 +▁li 1 +ran 1 +4 1 +▁local 1 +▁Representante 1 +era 1 +▁debido 1 +▁relativa 1 +▁crecimiento 1 +▁21 1 +▁relaciones 1 +▁especiales 1 +▁territorio 1 +! 1 +ni 1 +▁últimos 1 +▁puestos 1 +▁producción 1 +nd 1 +▁libre 1 +▁estudio 1 +▁formación 1 +▁22 1 +▁2003 1 +V 1 +▁todavía 1 +▁oportunidad 1 +”. 1 +▁centro 1 +▁imp 1 +▁Acción 1 +per 1 +▁tenía 1 +▁funciones 1 +▁acuerdos 1 +▁celebrada 1 +▁colaboración 1 +▁Iraq 1 +▁luego 1 +▁establecido 1 +▁economía 1 +▁señor 1 +ista 1 +▁consecuencias 1 +▁Le 1 +▁consultas 1 +▁VIH 1 +ré 1 +▁proteger 1 +um 1 +▁relacionadas 1 +▁principal 1 +▁Ba 1 +▁leyes 1 +▁sa 1 +▁llevar 1 +▁saber 1 +▁llegar 1 +▁Hay 1 +▁eran 1 +▁Co 1 +▁mantener 1 +▁casi 1 +▁éxito 1 +.2 1 +▁dis 1 +▁reducción 1 +▁utilización 1 +▁indica 1 +▁quiero 1 +▁podrá 1 +▁ro 1 +▁referencia 1 +tal 1 +▁refugiados 1 +▁ninguna 1 +▁eficaz 1 +▁ciudad 1 +▁ella 1 +w 1 +▁Di 1 +▁2006 1 +▁adopción 1 +tado 1 +▁prestar 1 +▁R 1 +▁financieros 1 +▁enfoque 1 +▁gobierno 1 +▁presentar 1 +▁ejercicio 1 +gu 1 +▁aprobación 1 +▁bienes 1 +▁presentación 1 +▁suma 1 +▁consecuencia 1 +▁31 1 +▁hotel 1 +ha 1 +▁Te 1 +it 1 +▁deberían 1 +▁hizo 1 +/1 1 +▁plenamente 1 +▁Penal 1 +ente 1 +▁políticos 1 +▁comunidades 1 +▁comp 1 +▁23 1 +▁indígenas 1 +▁razón 1 +▁régimen 1 +▁ellas 1 +▁riesgo 1 +▁Artículo 1 +▁Sra 1 +▁espera 1 +adas 1 +▁difícil 1 +ros 1 +▁Pro 1 +▁buena 1 +", 1 +▁varias 1 +du 1 +cri 1 +▁fuerzas 1 +▁menores 1 +▁presentado 1 +▁administración 1 +▁determinar 1 +▁nada 1 +cia 1 +▁obtener 1 +▁cuales 1 +▁and 1 +▁generales 1 +▁temas 1 +▁participar 1 +lu 1 +ber 1 +ing 1 +▁cu 1 +▁ingresos 1 +▁2007 1 +”, 1 +▁alguna 1 +cen 1 +▁sub 1 +cción 1 +▁proporcionar 1 +▁Código 1 +st 1 +men 1 +ll 1 +▁utilizar 1 +▁inglés 1 +tion 1 +▁Mo 1 +▁pe 1 +▁igual 1 +ct 1 +▁parece 1 +▁significa 1 +▁Vi 1 +▁dado 1 +ita 1 +▁esfera 1 +): 1 +den 1 +▁ningún 1 +tor 1 +▁debemos 1 +▁ne 1 +▁delito 1 +▁niveles 1 +▁observa 1 +▁incluidos 1 +▁curso 1 +▁concepto 1 +com 1 +▁mar 1 +▁seguimiento 1 +▁He 1 +▁político 1 +▁par 1 +▁agosto 1 +CN 1 +Qué 1 +▁observaciones 1 +ck 1 +▁existe 1 +▁apoyar 1 +▁mis 1 +▁casa 1 +cer 1 +▁peligro 1 +▁teniendo 1 +▁competencia 1 +▁análisis 1 +▁Permanente 1 +▁alcanzar 1 +▁centros 1 +▁so 1 +ho 1 +▁auto 1 +amos 1 +▁trabajadores 1 +bra 1 +▁segunda 1 +▁U 1 +▁ir 1 +▁bo 1 +▁II 1 +▁negociaciones 1 +▁real 1 +▁sexual 1 +ieron 1 +▁nunca 1 +▁fa 1 +▁secretaría 1 +ren 1 +▁N 1 +▁asuntos 1 +istas 1 +▁g 1 +▁progresos 1 +▁regiones 1 +▁relacionados 1 +7 1 +▁provisional 1 +Aplausos 1 +▁hacerlo 1 +ec 1 +car 1 +cio 1 +▁cargo 1 +cho 1 +▁desea 1 +rra 1 +▁evitar 1 +▁Económico 1 +▁pi 1 +pre 1 +▁violaciones 1 +ón 1 +/2 1 +8 1 +▁dinero 1 +▁K 1 +▁pesar 1 +▁insta 1 +hi 1 +▁Sa 1 +ul 1 +▁haciendo 1 +▁artículos 1 +▁febrero 1 +▁respeto 1 +▁sal 1 +▁esferas 1 +▁necesita 1 +▁usted 1 +ver 1 +▁financiera 1 +enta 1 +▁iniciativa 1 +▁escala 1 +▁civiles 1 +▁Relator 1 +▁Reino 1 +▁Rusia 1 +▁disposición 1 +▁independiente 1 +▁Centro 1 +▁penal 1 +tes 1 +▁fines 1 +▁terreno 1 +▁• 1 +▁trabajar 1 +▁dispuesto 1 +▁Medio 1 +▁podrían 1 +▁red 1 +▁judicial 1 +véase 1 +▁26 1 +▁oportunidades 1 +▁tales 1 +▁ad 1 +▁amenaza 1 +▁Constitución 1 +▁fuerza 1 +▁relativo 1 +▁ofrece 1 +▁necesarias 1 +gue 1 +izar 1 +▁desarme 1 +ras 1 +tivas 1 +▁párr 1 +▁cerca 1 +tivo 1 +▁Milenio 1 +▁Corte 1 +▁Democrática 1 +▁consiguiente 1 +tación 1 +▁miembro 1 +▁muerte 1 +▁estructura 1 +▁fondo 1 +▁delitos 1 +▁privado 1 +▁28 1 +▁Acuerdo 1 +▁claro 1 +▁pues 1 +▁aprobado 1 +▁hablar 1 +▁27 1 +▁EE 1 +▁Desde 1 +▁votación 1 +▁anterior 1 +▁seis 1 +▁adoptadas 1 +▁fortalecer 1 +▁2008 1 +▁Unido 1 +che 1 +▁lugares 1 +▁Plan 1 +▁mecanismo 1 +▁for 1 +▁misión 1 +▁tratamiento 1 +gre 1 +▁abordar 1 +Add 1 +▁enseñanza 1 +su 1 +nes 1 +▁incluida 1 +▁Mar 1 +▁Asia 1 +▁amplio 1 +SIDA 1 +▁Po 1 +▁estoy 1 +▁Esa 1 +▁palabras 1 +▁resolver 1 +▁sitio 1 +imos 1 +▁elecciones 1 +▁incluye 1 +▁Ro 1 +▁reconocimiento 1 +▁contribuciones 1 +▁Durante 1 +▁aprobó 1 +ró 1 +he 1 +UU 1 +ic 1 +▁niñas 1 +▁tratados 1 +idas 1 +rios 1 +▁cuyo 1 +▁señala 1 +▁recomienda 1 +▁planes 1 +sti 1 +▁propia 1 +▁partir 1 +▁actividad 1 +▁Cumbre 1 +cto 1 +▁Da 1 +th 1 +▁ba 1 +▁diversos 1 +mina 1 +▁elementos 1 +▁intereses 1 +▁Ministro 1 +tre 1 +▁junto 1 +.5 1 +▁York 1 +▁tenido 1 +siones 1 +▁elaboración 1 +▁sectores 1 +▁demanda 1 +ió 1 +j 1 +▁1999 1 +▁contenido 1 +▁presenta 1 +▁diversas 1 +▁único 1 +▁declaraciones 1 +▁eficacia 1 +▁militares 1 +▁quienes 1 +▁vigor 1 +entes 1 +▁Aunque 1 +▁miras 1 +▁completa 1 +ng 1 +ue 1 +▁instrumento 1 +▁incluir 1 +▁propio 1 +▁reserva 1 +▁tratar 1 +ac 1 +▁Tema 1 +▁necesarios 1 +▁empresa 1 +▁origen 1 +▁mí 1 +▁preparación 1 +nas 1 +▁escrito 1 +▁cuadro 1 +▁delegaciones 1 +▁recibir 1 +▁Paz 1 +▁constituye 1 +▁estudios 1 +▁estrategias 1 +▁bienio 1 +▁conocimientos 1 +▁central 1 +▁vol 1 +sas 1 +▁interna 1 +▁nuclear 1 +▁integración 1 +▁hijos 1 +▁Alto 1 +0.000 1 +▁militar 1 +▁supervisión 1 +▁Internet 1 +▁podía 1 +▁Ra 1 +▁estaban 1 +▁hacen 1 +▁car 1 +ana 1 +▁obligación 1 +▁situaciones 1 +▁pasa 1 +▁Asimismo 1 +9 1 +▁tortura 1 +▁permite 1 +▁familias 1 +▁H 1 +pu 1 +▁satisfacción 1 +▁supuesto 1 +min 1 +▁50 1 +ales 1 +▁entidades 1 +▁na 1 +▁afirma 1 +▁infraestructura 1 +▁escuelas 1 +▁pequeñas 1 +▁San 1 +▁posición 1 +mp 1 +▁fi 1 +▁funciona 1 +▁Congo 1 +▁misiones 1 +zo 1 +.4 1 +▁cierto 1 +10 1 +▁material 1 +▁superior 1 +▁sección 1 +▁económicos 1 +▁humana 1 +▁luz 1 +▁apoya 1 +gen 1 +▁línea 1 +▁Ho 1 +▁pide 1 +am 1 +ña 1 +▁apartado 1 +▁India 1 +▁cambios 1 +mar 1 +ces 1 +▁democracia 1 +▁ho 1 +▁global 1 +▁anual 1 +▁adoptado 1 +K 1 +ina 1 +▁usar 1 +ero 1 +nda 1 +▁capital 1 +io 1 +cos 1 +ed 1 +▁participantes 1 +▁juicio 1 +fo 1 +▁dirección 1 +▁palabra 1 +▁violación 1 +▁contribuir 1 +▁pena 1 +▁figuran 1 +▁intercambio 1 +▁solicitud 1 +▁mejores 1 +▁incluidas 1 +▁Sala 1 +▁alta 1 +▁comunicaciones 1 +▁cantidad 1 +▁última 1 +▁financieras 1 +▁suficiente 1 +▁titulado 1 +▁oficiales 1 +▁cultura 1 +▁actualmente 1 +▁graves 1 +▁trans 1 +▁oficial 1 +▁tenga 1 +▁allí 1 +▁firma 1 +▁universal 1 +ment 1 +▁marcha 1 +▁padres 1 +▁29 1 +▁industria 1 +▁asociados 1 +▁preocupa 1 +▁alcance 1 +▁invita 1 +gar 1 +▁respecta 1 +▁V 1 +ex 1 +▁diferencia 1 +▁tráfico 1 +cion 1 +▁siguen 1 +▁2000, 1 +▁pidió 1 +▁públicos 1 +▁mal 1 +▁carta 1 +▁Entonces 1 +▁condición 1 +▁relativos 1 +ye 1 +▁camino 1 +▁compromisos 1 +▁PNUD 1 +▁recomendación 1 +▁com 1 +▁semana 1 +▁alrededor 1 +0 1 +.3 1 +rre 1 +▁armados 1 +▁detención 1 +▁continuación 1 +wa 1 +▁Presidencia 1 +▁tribunales 1 +bilidad 1 +bles 1 +▁tengan 1 +llo 1 +▁tierra 1 +sen 1 +ka 1 +▁finales 1 +▁Federación 1 +▁expresa 1 +▁acciones 1 +▁Entre 1 +▁carga 1 +▁eliminar 1 +▁distintos 1 +▁funcionamiento 1 +▁circunstancias 1 +▁elaborar 1 +▁Justicia 1 +▁ri 1 +▁donantes 1 +▁man 1 +▁pensar 1 +▁entorno 1 +▁desarrollar 1 +habla 1 +▁tercer 1 +▁contar 1 +▁Li 1 +▁procesos 1 +▁Derecho 1 +▁refiere 1 +▁directrices 1 +▁van 1 +▁reglamento 1 +▁Afganistán 1 +▁Segú 1 +▁modelo 1 +▁hi 1 +dad 1 +▁Creo 1 +art 1 +▁jurídico 1 +▁Bo 1 +▁seguro 1 +▁pobres 1 +▁dejar 1 +▁cosa 1 +tivos 1 +▁be 1 +▁esté 1 +▁existentes 1 +▁ce 1 +▁incluido 1 +▁dificultades 1 +▁comerciales 1 +aciones 1 +▁propiedad 1 +▁DE 1 +lle 1 +50 1 +ut 1 +▁drogas 1 +▁previsto 1 +▁considerar 1 +▁dispone 1 +▁razones 1 +▁res 1 +▁reconoce 1 +▁hu 1 +▁Todos 1 +▁adicionales 1 +▁naturales 1 +▁plena 1 +▁fe 1 +▁2005, 1 +▁establece 1 +rían 1 +▁deseo 1 +remos 1 +▁Administración 1 +tribu 1 +▁tecnologías 1 +▁única 1 +▁perspectiva 1 +▁paso 1 +▁mu 1 +▁ob 1 +▁cambiar 1 +▁interesados 1 +qué 1 +▁can 1 +ad 1 +▁estabilidad 1 +▁construcción 1 +▁humano 1 +▁eliminación 1 +▁W 1 +▁financiero 1 +ia 1 +▁confianza 1 +▁anti 1 +▁humanitaria 1 +▁materiales 1 +▁J 1 +tri 1 +▁quisiera 1 +▁efectiva 1 +▁anteriores 1 +dio 1 +▁cuya 1 +ton 1 +▁pruebas 1 +▁Canadá 1 +can 1 +▁– 1 +▁hombre 1 +▁oficinas 1 +té 1 +▁Japón 1 +tura 1 +▁Francia 1 +▁pequeños 1 +▁tasa 1 +▁prevenir 1 +▁económicas 1 +▁decidió 1 +▁tendrá 1 +▁opiniones 1 +▁contribución 1 +▁éste 1 +pen 1 +▁criterios 1 +▁determinación 1 +tera 1 +▁conseguir 1 +▁UNICEF 1 +▁hora 1 +▁queda 1 +▁producto 1 +▁dicha 1 +▁Banco 1 +▁conocimiento 1 +▁prevé 1 +▁verdad 1 +bu 1 +▁tribunal 1 +▁básicos 1 +▁culturales 1 +ol 1 +▁conclusiones 1 +cre 1 +▁entender 1 +▁Gracias 1 +▁Yo 1 +▁acoge 1 +▁encuentra 1 +▁Gran 1 +▁2001, 1 +AC 1 +▁climático 1 +▁quien 1 +▁profesional 1 +▁aplica 1 +▁2003, 1 +▁planificación 1 +▁conferencias 1 +▁40 1 +▁necesaria 1 +▁prioridades 1 +▁web 1 +▁tuvo 1 +ig 1 +▁dirigida 1 +Y 1 +▁60 1 +▁trabajos 1 +▁naturaleza 1 +▁recibido 1 +▁Eso 1 +▁presentó 1 +▁extra 1 +ario 1 +et 1 +▁siglo 1 +▁Comisionado 1 +▁escuela 1 +▁esencial 1 +▁tengo 1 +▁formular 1 +▁física 1 +▁presencia 1 +gan 1 +spe 1 +▁UN 1 +▁valores 1 +▁beneficios 1 +mento 1 +rias 1 +nu 1 +▁Países 1 +▁estima 1 +▁Presidenta 1 +▁proliferación 1 +mas 1 +▁señaló 1 +▁directamente 1 +▁transición 1 +▁posibles 1 +▁Mujer 1 +tan 1 +▁2004, 1 +▁comercial 1 +▁diseño 1 +▁mercados 1 +sto 1 +tica 1 +▁informar 1 +cur 1 +▁duda 1 +▁Norte 1 +▁reforzar 1 +▁2006, 1 +▁gra 1 +▁explotación 1 +▁Información 1 +▁entrada 1 +▁distribución 1 +▁2002, 1 +▁motivos 1 +▁permitir 1 +▁existen 1 +▁Ex 1 +▁Pe 1 +▁jurídica 1 +▁evaluar 1 +▁agentes 1 +▁transferencia 1 +nta 1 +15 1 +▁algún 1 +▁División 1 +▁combatir 1 +▁existencia 1 +▁independencia 1 +cas 1 +▁lleva 1 +izado 1 +▁europea 1 +▁fortalecimiento 1 +der 1 +▁cuerpo 1 +rro 1 +▁tipos 1 +▁asunto 1 +▁do 1 +▁exp 1 +▁Instituto 1 +▁Director 1 +▁fuerte 1 +ándose 1 +dica 1 +▁preparar 1 +▁puntos 1 +▁realizado 1 +▁serán 1 +▁clave 1 +▁fuentes 1 +▁fomentar 1 +nia 1 +▁Recordando 1 +at 1 +▁internos 1 +▁métodos 1 +▁mismos 1 +ctor 1 +▁infantil 1 +▁grado 1 +▁instalaciones 1 +▁bastante 1 +▁gustaría 1 +▁términos 1 +▁oficina 1 +▁Kosovo 1 +▁autoridad 1 +▁Na 1 +▁Pide 1 +▁preguntas 1 +▁cultural 1 +▁territorios 1 +▁alguien 1 +▁Ta 1 +▁Foro 1 +▁menudo 1 +▁pasar 1 +▁pesca 1 +▁señalar 1 +lí 1 +sos 1 +▁terroristas 1 +▁visto 1 +có 1 +La 1 +▁corre 1 +ste 1 +▁Véase 1 +fu 1 +W 1 +▁Ginebra 1 +▁habría 1 +▁Ka 1 +▁Sudán 1 +▁órgano 1 +▁Consultiva 1 +▁Niño 1 +▁fomento 1 +▁completo 1 +▁prioridad 1 +iera 1 +org 1 +▁motivo 1 +▁¡ 1 +▁distintas 1 +▁celebrar 1 +▁cuentas 1 +▁deberá 1 +lar 1 +▁refleja 1 +▁Ar 1 +xi 1 +▁defensa 1 +ke 1 +▁propone 1 +iza 1 +▁1997 1 +▁asimismo 1 +par 1 +▁extraordinario 1 +▁muestra 1 +▁claramente 1 +▁basa 1 +▁pu 1 +▁abierta 1 +▁próximo 1 +▁vigilancia 1 +▁llega 1 +▁permanente 1 +para 1 +▁venta 1 +ine 1 +▁discapacidad 1 +▁menor 1 +pare 1 +▁abarca 1 +▁baja 1 +nal 1 +▁tratado 1 +▁m 1 +stru 1 +ie 1 +▁solicita 1 +▁Bueno 1 +▁consulta 1 +▁México 1 +▁voluntad 1 +▁Alemania 1 +▁palestino 1 +▁saben 1 +▁prestación 1 +▁responsables 1 +▁Ja 1 +▁Com 1 +▁deuda 1 +▁demasiado 1 +▁diferente 1 +▁conforme 1 +▁(19 1 +▁categoría 1 +sh 1 +▁cor 1 +rnos 1 +pon 1 +▁Ju 1 +nt 1 +▁tareas 1 +Cómo 1 +▁buen 1 +_ 1 +▁concretas 1 +▁depende 1 +▁miles 1 +▁inversión 1 +▁totalmente 1 +▁exige 1 +▁ocupa 1 +▁ciencia 1 +idades 1 +▁preciso 1 +▁humanitario 1 +▁enfermedades 1 +▁grave 1 +nto 1 +▁cerebro 1 +▁cuarto 1 +▁ustedes 1 +▁1996 1 +▁puedo 1 +▁progreso 1 +▁Comunidad 1 +▁visita 1 +son 1 +▁Irlanda 1 +▁costos 1 +▁integrado 1 +▁condena 1 +▁Oriente 1 +▁creado 1 +OS 1 +▁ésta 1 +▁enmiendas 1 +▁bu 1 +▁Mu 1 +▁precios 1 +▁costo 1 +able 1 +▁s 1 +▁prueba 1 +▁sanciones 1 +▁fácil 1 +cal 1 +▁pago 1 +▁primero 1 +▁2007, 1 +mb 1 +pro 1 +▁públicas 1 +▁fu 1 +ced 1 +▁Hotel 1 +▁riesgos 1 +▁Ya 1 +▁ideas 1 +▁adecuada 1 +▁debía 1 +▁alimentos 1 +tin 1 +▁du 1 +ki 1 +▁mayores 1 +▁Cooperación 1 +▁Sur 1 +▁Estos 1 +▁marca 1 +ou 1 +ud 1 +▁causas 1 +▁responder 1 +▁Proyecto 1 +▁máximo 1 +No 1 +▁directa 1 +cía 1 +▁simplemente 1 +▁pleno 1 +▁destrucción 1 +▁2009 1 +▁informa 1 +▁ahí 1 +oc 1 +5% 1 +ifica 1 +▁ofrecer 1 +land 1 +om 1 +▁mínimo 1 +▁1998 1 +dió 1 +▁Corea 1 +▁estén 1 +▁reciente 1 +▁Brasil 1 +▁Toma 1 +cida 1 +mba 1 +▁aprobar 1 +▁principalmente 1 +▁asesoramiento 1 +20 1 +rica 1 +▁mucha 1 +▁Cuba 1 +▁mejora 1 +▁tarea 1 +▁particularmente 1 +▁mano 1 +▁soluciones 1 +▁eficaces 1 +▁Turquía 1 +▁ti 1 +▁llamamiento 1 +▁Servicio 1 +puesta 1 +bor 1 +▁urgente 1 +▁redes 1 +▁esperanza 1 +ER 1 +▁Comisario 1 +▁posibilidades 1 +▁importa 1 +▁palestinos 1 +▁profesionales 1 +▁investigaciones 1 +▁registro 1 +11 1 +▁Reunión 1 +▁asociación 1 +▁actuales 1 +▁mitad 1 +▁« 1 +▁propuesto 1 +des 1 +▁crea 1 +▁quincuagésimo 1 +▁párrafos 1 +▁sexo 1 +▁viven 1 +° 1 +▁Lu 1 +▁electrónico 1 +▁ordinario 1 +▁mil 1 +▁comunes 1 +▁inversiones 1 +▁100 1 +ció 1 +▁consenso 1 +▁volver 1 +▁Reglamento 1 +▁continua 1 +▁examina 1 +▁mundiales 1 +▁emp 1 +▁hechos 1 +▁vincula 1 +ante 1 +▁emergencia 1 +▁realización 1 +clu 1 +▁libertades 1 +▁celebrado 1 +CI 1 +▁rápido 1 +▁cabe 1 +▁recientemente 1 +til 1 +▁An 1 +▁Que 1 +▁Salud 1 +im 1 +▁indicadores 1 +▁Servicios 1 +▁estudiantes 1 +mu 1 +▁garantiza 1 +▁interesante 1 +Por 1 +▁resto 1 +▁activa 1 +▁conclusión 1 +torio 1 +▁pequeño 1 +▁rurales 1 +▁transparencia 1 +iva 1 +▁revisión 1 +▁logro 1 +▁Fa 1 +▁adicional 1 +▁ci 1 +és 1 +▁ponente 1 +▁Estas 1 +12 1 +▁mañana 1 +mit 1 +▁enmienda 1 +▁necesitan 1 +.9 1 +▁clara 1 +lan 1 +▁celebra 1 +▁legal 1 +▁ta 1 +▁realizados 1 +▁contratación 1 +▁Ga 1 +▁institucional 1 +▁períodos 1 +▁haga 1 +▁beneplácito 1 +▁grande 1 +▁responsable 1 +▁racial 1 +▁propios 1 +▁1995 1 +gos 1 +▁minutos 1 +▁Palestina 1 +via 1 +▁europeos 1 +qu 1 +▁implica 1 +▁preparado 1 +▁enviar 1 +sis 1 +▁hincapié 1 +AR 1 +ice 1 +▁comenzar 1 +▁extranjeros 1 +▁familiar 1 +▁especializados 1 +dia 1 +▁sol 1 +▁requisitos 1 +▁España 1 +▁normal 1 +▁mes 1 +▁orientación 1 +▁firme 1 +▁Africana 1 +▁Irán 1 +ju 1 +ieran 1 +▁2008, 1 +▁ciudades 1 +▁To 1 +40 1 +bri 1 +▁inicial 1 +▁Ejecutivo 1 +▁media 1 +▁ru 1 +met 1 +▁ambos 1 +▁definición 1 +▁llama 1 +▁presta 1 +▁multi 1 +▁disponibles 1 +X 1 +25 1 +amiento 1 +gui 1 +▁Organismo 1 +▁celebró 1 +▁bi 1 +▁Pre 1 +▁sur 1 +▁vulnerables 1 +▁etc 1 +toria 1 +▁factores 1 +▁Mesa 1 +▁constituyen 1 +ry 1 +▁consumo 1 +% 1 +ko 1 +▁queremos 1 +▁viaje 1 +▁simple 1 +idos 1 +▁respuestas 1 +▁contiene 1 +▁versión 1 +▁encuentran 1 +▁imagen 1 +yo 1 +ito 1 +eo 1 +▁minorías 1 +▁qui 1 +▁Porque 1 +▁capítulo 1 +ES 1 +▁contrario 1 +El 1 +▁Hace 1 +▁petición 1 +▁afectados 1 +▁aquellos 1 +▁conocer 1 +▁interior 1 +▁actuar 1 +?" 1 +aba 1 +▁voy 1 +▁fase 1 +▁adelante 1 +▁asociaciones 1 +▁allá 1 +▁Caribe 1 +▁Dis 1 +▁llamado 1 +▁desempeñar 1 +cul 1 +▁conjunta 1 +▁Leona 1 +gado 1 +nce 1 +▁UNCTAD 1 +▁construir 1 +▁Bu 1 +▁empezar 1 +▁tarde 1 +▁siga 1 +▁difusión 1 +▁planeta 1 +▁Líbano 1 +▁comisión 1 +▁únicamente 1 +▁v 1 +▁contrato 1 +30 1 +▁Facultativo 1 +▁campo 1 +▁podrán 1 +▁etapa 1 +▁Comercio 1 +▁diversidad 1 +▁noche 1 +▁página 1 +▁solamente 1 +▁prohibición 1 +J 1 +ger 1 +▁Examen 1 +▁constante 1 +▁potencial 1 +▁intención 1 +▁garantías 1 +vis 1 +▁cal 1 +▁encargado 1 +▁efectivo 1 +▁pedir 1 +▁Tenemos 1 +▁plano 1 +tina 1 +▁quiere 1 +amente 1 +▁tri 1 +una 1 +▁europeo 1 +▁deberán 1 +▁reservas 1 +▁arte 1 +▁Sierra 1 +▁calle 1 +▁breve 1 +▁masa 1 +▁informó 1 +▁competentes 1 +▁celebrará 1 +ivo 1 +▁madre 1 +▁título 1 +▁propósito 1 +▁Educación 1 +▁Ejecutiva 1 +▁expresión 1 +▁sabemos 1 +AN 1 +UN 1 +▁enorme 1 +▁tenían 1 +▁1° 1 +▁conferencia 1 +▁recordar 1 +▁entrega 1 +▁supone 1 +▁Exteriores 1 +▁interno 1 +▁estudiar 1 +▁abierto 1 +▁ONG 1 +▁Pacífico 1 +▁trabajando 1 +▁Fi 1 +▁Australia 1 +▁denuncia 1 +13 1 +▁destaca 1 +▁velar 1 +dora 1 +▁Relaciones 1 +▁cuotas 1 +▁familiares 1 +▁garantía 1 +▁1990 1 +▁Árabe 1 +ere 1 +▁Go 1 +▁pie 1 +▁celebración 1 +▁Quiero 1 +rme 1 +▁estratégico 1 +▁técnicas 1 +mer 1 +▁conciencia 1 +op 1 +▁próxima 1 +▁III 1 +▁licencia 1 +▁ámbitos 1 +▁Italia 1 +▁aprobada 1 +▁minas 1 +▁bueno 1 +▁ampliar 1 +▁basada 1 +▁afecta 1 +▁pequeña 1 +▁confirma 1 +rí 1 +▁vamos 1 +▁... 1 +▁isla 1 +▁debates 1 +▁actualidad 1 +ura 1 +▁religión 1 +▁Ne 1 +anza 1 +▁corresponde 1 +val 1 +▁Ese 1 +▁matrimonio 1 +▁solicitudes 1 +▁Hi 1 +▁h 1 +cta 1 +▁Después 1 +▁requiere 1 +tada 1 +▁jurídicos 1 +rea 1 +.7 1 +▁Occidental 1 +▁proporciona 1 +▁continuar 1 +▁recuperación 1 +▁daños 1 +▁cita 1 +▁formulación 1 +▁juego 1 +▁expresar 1 +-20 1 +▁hubiera 1 +▁bienestar 1 +▁precio 1 +▁semanas 1 +▁contacto 1 +ez 1 +▁luchar 1 +nza 1 +▁Ni 1 +▁aprovechar 1 +▁llegado 1 +▁sala 1 +▁necesitamos 1 +▁conducto 1 +▁trato 1 +▁Do 1 +▁Política 1 +▁busca 1 +▁Subcomisión 1 +duc 1 +▁nadie 1 +▁dio 1 +▁pérdida 1 +▁Timor 1 +id 1 +tí 1 +▁poblaciones 1 +▁Guinea 1 +▁natural 1 +▁somos 1 +▁esfuerzo 1 +▁clientes 1 +▁Anexo 1 +▁impacto 1 +▁habitaciones 1 +▁vivienda 1 +▁prisión 1 +▁aproximadamente 1 +▁plantea 1 +▁Asociación 1 +▁creciente 1 +▁examinado 1 +▁estrecha 1 +em 1 +ist 1 +▁evolución 1 +▁justo 1 +▁Algunos 1 +▁código 1 +▁clase 1 +▁ocupación 1 +▁intervención 1 +▁humanidad 1 +▁habrá 1 +▁Bosnia 1 +▁Todo 1 +▁habla 1 +▁ampliación 1 +14 1 +▁Inter 1 +▁avances 1 +▁impedir 1 +▁sexagésimo 1 +▁pone 1 +nar 1 +▁Ve 1 +▁cap 1 +MI 1 +▁Uno 1 +mor 1 +flu 1 +▁mental 1 +▁evidente 1 +▁diferencias 1 +▁educa 1 +▁adhesión 1 +▁tercera 1 +tel 1 +▁"¿ 1 +▁Dependencia 1 +▁fuente 1 +antes 1 +▁Pakistán 1 +ula 1 +» 1 +▁honor 1 +▁sabe 1 +▁frecuencia 1 +▁economías 1 +▁útil 1 +ld 1 +▁Son 1 +cina 1 +▁0 1 +▁movimiento 1 +gó 1 +▁suficientes 1 +▁recurso 1 +▁rec 1 +▁fra 1 +▁examinó 1 +▁visión 1 +lin 1 +▁Pública 1 +▁utiliza 1 +▁fronteras 1 +▁aspecto 1 +▁identidad 1 +▁logrado 1 +▁tierras 1 +▁acto 1 +▁hicieron 1 +.6 1 +▁nacionalidad 1 +▁idiomas 1 +▁Nos 1 +▁aire 1 +▁interesadas 1 +▁ref 1 +rma 1 +▁acaba 1 +▁Cada 1 +dic 1 +▁rápidamente 1 +▁Universidad 1 +▁facilita 1 +▁mesa 1 +▁Je 1 +▁incluyen 1 +▁estadísticas 1 +▁Está 1 +dida 1 +▁aceptar 1 +▁corrupción 1 +▁judiciales 1 +▁partida 1 +▁oficiosas 1 +rico 1 +ea 1 +▁exterior 1 +▁Sudáfrica 1 +tico 1 +▁orador 1 +▁2010 1 +▁Tiene 1 +▁come 1 +▁compra 1 +▁médico 1 +▁Hemos 1 +izada 1 +▁elección 1 +▁destino 1 +▁enfermedad 1 +▁probablemente 1 +▁autores 1 +▁periódico 1 +▁1994 1 +▁opciones 1 +rgi 1 +▁comportamiento 1 +▁acontecimientos 1 +▁Man 1 +▁excelente 1 +cl 1 +▁Pi 1 +▁agricultura 1 +▁inf 1 +▁siete 1 +▁cambia 1 +18 1 +▁inclusión 1 +▁representación 1 +▁soy 1 +tt 1 +▁duración 1 +▁ju 1 +▁obstáculos 1 +▁Aquí 1 +▁Tras 1 +▁l 1 +▁opera 1 +▁mensaje 1 +tarios 1 +▁cursos 1 +▁ilícito 1 +stra 1 +▁elemento 1 +▁Estamos 1 +▁eficiencia 1 +bre 1 +aje 1 +* 1 +▁indemnización 1 +— 1 +▁animales 1 +▁adecuado 1 +▁recientes 1 +▁Herzegovina 1 +▁bar 1 +▁sé 1 +▁Desarme 1 +▁Bajos 1 +▁op 1 +▁Sociales 1 +▁vidas 1 +▁dije 1 +▁Wa 1 +▁plenaria 1 +▁adelantados 1 +▁consolidación 1 +▁directo 1 +▁reformas 1 +▁Ki 1 +▁israelíes 1 +ica 1 +ly 1 +port 1 +▁extranjero 1 +▁acompaña 1 +▁ajuste 1 +▁ataques 1 +▁trabaja 1 +▁privada 1 +▁pronto 1 +60 1 +rlo 1 +▁dirigentes 1 +ible 1 +▁mostrar 1 +ino 1 +endo 1 +▁gusta 1 +▁reitera 1 +ab 1 +IT 1 +▁hogar 1 +▁crédito 1 +▁seguirá 1 +▁Sede 1 +▁búsqueda 1 +▁decenio 1 +▁campaña 1 +▁basado 1 +▁Comp 1 +▁categorías 1 +▁llamada 1 +▁Popular 1 +iente 1 +▁dan 1 +siona 1 +▁activamente 1 +▁comprendido 1 +▁ONU 1 +ite 1 +16 1 +▁observó 1 +▁Fue 1 +▁considerable 1 +▁Egipto 1 +▁equipos 1 +▁constitucional 1 +▁occidental 1 +▁increíble 1 +ificación 1 +liza 1 +▁Viena 1 +▁tradicionales 1 +▁Protección 1 +▁termina 1 +▁jurisdicción 1 +▁específicas 1 +▁Car 1 +AD 1 +▁super 1 +▁Suiza 1 +día 1 +van 1 +▁regular 1 +▁rep 1 +▁eficiente 1 +▁Uds 1 +▁Latina 1 +▁Cu 1 +▁candidatos 1 +▁directiva 1 +jan 1 +oso 1 +▁Islámica 1 +▁organizar 1 +▁primeros 1 +pla 1 +▁reconocer 1 +▁pagar 1 +▁explica 1 +( 1 +▁Colombia 1 +▁conducta 1 +▁vivir 1 +▁teléfono 1 +vert 1 +▁participa 1 +▁completamente 1 +▁denuncias 1 +De 1 +ento 1 +cial 1 +▁supra 1 +▁oferta 1 +▁profunda 1 +sion 1 +▁rehabilitación 1 +▁centra 1 +▁emisiones 1 +▁suministro 1 +▁The 1 +▁Insta 1 +▁primaria 1 +▁sucede 1 +▁infancia 1 +tiendo 1 +▁doble 1 +▁Nota 1 +▁chi 1 +▁k 1 +▁ilegal 1 +▁Somalia 1 +▁Yugoslavia 1 +▁otorga 1 +▁Kuwait 1 +17 1 +▁laboral 1 +▁1999, 1 +▁producir 1 +▁experiencias 1 +▁Gestión 1 +▁formuladas 1 +cimiento 1 +▁asilo 1 +▁san 1 +▁Gu 1 +▁ciclo 1 +▁señora 1 +▁atender 1 +MA 1 +▁oriental 1 +fund 1 +▁dichos 1 +CE 1 +▁hacemos 1 +▁establecidos 1 +▁realiza 1 +▁migrantes 1 +▁razonable 1 +▁español 1 +ide 1 +▁So 1 +▁reglas 1 +ión 1 +ñ 1 +▁hablando 1 +én 1 +01 1 +▁amigos 1 +▁cuidado 1 +▁tomó 1 +▁composición 1 +▁expresó 1 +▁asigna 1 +▁compartir 1 +▁trabajan 1 +gel 1 +▁tendría 1 +▁hijo 1 +▁enfrenta 1 +/19 1 +▁estructuras 1 +▁deliberaciones 1 +▁combina 1 +▁acta 1 +▁positiva 1 +cip 1 +▁rápida 1 +mbre 1 +▁Presupuesto 1 +▁porcentaje 1 +we 1 +▁comienzo 1 +▁Expertos 1 +▁concluir 1 +▁usuarios 1 +▁consigna 1 +▁levanta 1 +pri 1 +▁disponible 1 +▁entra 1 +▁tomado 1 +▁Noruega 1 +▁sistemática 1 +▁Sección 1 +▁médicos 1 +cor 1 +▁Señorías 1 +ev 1 +▁demostrado 1 +nder 1 +▁imágenes 1 +▁X 1 +▁Indonesia 1 +▁Hoy 1 +▁propias 1 +▁comunitario 1 +▁Tierra 1 +▁multilaterales 1 +▁desplazados 1 +ificado 1 +▁integrada 1 +▁fiscal 1 +DE 1 +▁iniciar 1 +▁técnico 1 +▁hagan 1 +▁Estatuto 1 +jas 1 +▁alimentaria 1 +▁Ucrania 1 +table 1 +▁jurídicas 1 +gubernamentales 1 +▁costa 1 +▁desafíos 1 +▁desastres 1 +ones 1 +▁Central 1 +▁previstos 1 +▁ACNUR 1 +▁pertinente 1 +ion 1 +▁lado 1 +▁pocos 1 +▁ambas 1 +ua 1 +▁radio 1 +▁Señora 1 +▁incorporar 1 +▁Ri 1 +▁término 1 +▁fenómeno 1 +▁Rwanda 1 +▁Argentina 1 +▁mandatos 1 +▁estratégica 1 +▁integridad 1 +aria 1 +▁especie 1 +▁decidir 1 +bar 1 +▁temporal 1 +▁sentencia 1 +▁favorable 1 +chi 1 +▁escolar 1 +▁w 1 +▁ocho 1 +▁35 1 +vie 1 +▁clima 1 +▁euros 1 +▁im 1 +IS 1 +▁distrito 1 +▁libro 1 +▁Bien 1 +▁instancia 1 +▁Económica 1 +▁recomendó 1 +▁Estrategia 1 +lia 1 +▁compara 1 +▁servir 1 +▁presentes 1 +▁intento 1 +▁Ello 1 +EN 1 +▁publicación 1 +ano 1 +▁industrial 1 +▁Aplicación 1 +▁Oriental 1 +▁médica 1 +dera 1 +▁científicos 1 +▁adolescentes 1 +▁Suecia 1 +▁estatuto 1 +▁técnicos 1 +▁dirigido 1 +icio 1 +▁tamaño 1 +▁alienta 1 +▁estipula 1 +▁petróleo 1 +▁cifras 1 +▁reafirma 1 +▁empleados 1 +▁contribuye 1 +▁africanos 1 +Se 1 +▁desarrollados 1 +▁logros 1 +▁remo 1 +▁presidente 1 +▁Económicos 1 +▁Embajador 1 +▁pedido 1 +▁científica 1 +▁♫ 1 +vers 1 +▁% 1 +▁norte 1 +▁legislativo 1 +▁Tal 1 +▁amenazas 1 +▁dependencia 1 +▁moral 1 +▁viola 1 +▁residencia 1 +▁tasas 1 +ner 1 +▁electrónica 1 +▁repercusiones 1 +mbra 1 +▁fuego 1 +/20 1 +▁opción 1 +▁tratos 1 +▁consideración 1 +▁buscar 1 +▁características 1 +▁racismo 1 +▁prensa 1 +▁padre 1 +▁Marco 1 +▁sociedades 1 +▁Z 1 +99 1 +IC 1 +▁restricciones 1 +▁consiste 1 +▁interpretación 1 +▁Documentos 1 +resolución 1 +▁000 1 +▁sola 1 +ts 1 +▁respetar 1 +▁espíritu 1 +▁sustantivo 1 +▁45 1 +En 1 +▁voto 1 +▁dónde 1 +▁entrar 1 +▁Delito 1 +ores 1 +sal 1 +▁Va 1 +▁normativa 1 +▁participan 1 +mé 1 +▁obra 1 +▁organizada 1 +▁Pu 1 +▁Nuestro 1 +▁institución 1 +▁ejercer 1 +▁crímenes 1 +tima 1 +▁entendimiento 1 +▁terceros 1 +▁capacidades 1 +▁aplicable 1 +▁organizado 1 +▁norma 1 +▁superar 1 +CP 1 +eros 1 +▁(1 1 +▁Hu 1 +AS 1 +▁Nigeria 1 +▁Serbia 1 +▁exposición 1 +ral 1 +▁reclamaciones 1 +tergubernamental 1 +zar 1 +▁demuestra 1 +▁francés 1 +gua 1 +▁comisiones 1 +hu 1 +97 1 +▁ambientales 1 +▁rechaza 1 +▁inferior 1 +▁intenta 1 +lega 1 +▁Liberia 1 +▁Ge 1 +▁Islas 1 +Es 1 +est 1 +▁cree 1 +▁transmit 1 +▁individual 1 +ay 1 +▁democrático 1 +▁limitado 1 +▁Muchas 1 +▁idioma 1 +tero 1 +▁Burundi 1 +▁destinados 1 +▁ciertas 1 +▁Jo 1 +▁dichas 1 +▁carrera 1 +▁pudiera 1 +▁subraya 1 +▁elegir 1 +RI 1 +pul 1 +▁áreas 1 +▁manos 1 +▁Estoy 1 +▁larga 1 +▁contratos 1 +▁comunitaria 1 +▁Ambiente 1 +▁operacionales 1 +▁llevado 1 +▁Puede 1 +ico 1 +▁desarrolla 1 +mica 1 +ff 1 +▁delincuencia 1 +▁ambiental 1 +▁podían 1 +▁foto 1 +ología 1 +▁habitantes 1 +▁Siria 1 +OM 1 +▁debidamente 1 +▁Decide 1 +▁Primer 1 +illo 1 +▁área 1 +▁democrática 1 +▁hacía 1 +▁em 1 +▁Coordinación 1 +ena 1 +bla 1 +▁equilibrio 1 +▁Bi 1 +▁aborda 1 +▁preocupaciones 1 +▁iba 1 +stro 1 +▁ratificación 1 +03 1 +ent 1 +▁colegas 1 +▁colectiva 1 +.1) 1 +▁similares 1 +▁Cámara 1 +▁solidaridad 1 +▁elaborado 1 +udi 1 +▁éstos 1 +▁inmediatamente 1 +zi 1 +▁fo 1 +▁Otros 1 +▁Pen 1 +▁mismas 1 +▁Ad 1 +▁árabe 1 +▁complejo 1 +▁Chile 1 +▁recuerda 1 +▁provoca 1 +▁48 1 +▁comida 1 +▁vemos 1 +▁activos 1 +▁quería 1 +▁aprender 1 +▁fija 1 +au 1 +/4 1 +/58/ 1 +▁consentimiento 1 +▁digital 1 +▁oradores 1 +▁puesta 1 +▁Cuadro 1 +▁quizá 1 +▁esenciales 1 +▁detallada 1 +▁ocasión 1 +fri 1 +▁índole 1 +▁alentar 1 +▁determinados 1 +AL 1 +▁quién 1 +▁comparación 1 +▁Chipre 1 +▁indicado 1 +▁generación 1 +▁Prevención 1 +▁reales 1 +▁sede 1 +▁aéreo 1 +cita 1 +▁continúa 1 +▁1993 1 +▁regula 1 +▁quieren 1 +45 1 +▁podido 1 +▁Be 1 +▁estados 1 +▁color 1 +ial 1 +▁bilaterales 1 +96 1 +▁CA 1 +▁col 1 +izados 1 +lé 1 +/55/ 1 +bro 1 +ín 1 +▁concretos 1 +▁producido 1 +▁proceda 1 +▁original 1 +form 1 +▁componente 1 +▁federal 1 +▁música 1 +tores 1 +▁mira 1 +▁t 1 +▁seres 1 +▁procesa 1 +▁sustancias 1 +▁diez 1 +▁beneficio 1 +▁viene 1 +▁cri 1 +serv 1 +▁32 1 +▁meta 1 +cional 1 +▁intelectual 1 +▁inmediato 1 +▁ocasiones 1 +cien 1 +▁presión 1 +▁posterior 1 +▁2005. 1 +ler 1 +scripción 1 +mbi 1 +ord 1 +▁Debe 1 +▁For 1 +▁buenas 1 +▁exhorta 1 +▁solicitar 1 +▁externa 1 +▁componentes 1 +cciones 1 +ien 1 +duzca 1 +ation 1 +▁seguido 1 +▁34 1 +ndido 1 +mática 1 +▁Marruecos 1 +▁operación 1 +▁37 1 +▁tendencia 1 +▁vecinos 1 +▁Federal 1 +illa 1 +▁computadora 1 +▁definitiva 1 +fre 1 +▁protocolo 1 +▁54 1 +▁Personal 1 +▁demora 1 +uda 1 +▁Apoyo 1 +▁tampoco 1 +eg 1 +▁Per 1 +▁separado 1 +▁cáncer 1 +▁ilícita 1 +▁consumidores 1 +UR 1 +▁Ru 1 +▁esperar 1 +▁et 1 +▁anteriormente 1 +▁cara 1 +/3 1 +▁vía 1 +▁Costa 1 +▁acordado 1 +▁Ministros 1 +▁encaminadas 1 +ego 1 +▁partidos 1 +▁explicar 1 +ow 1 +▁prestando 1 +▁numerosas 1 +▁tuviera 1 +▁segura 1 +▁hogares 1 +▁unidad 1 +▁permita 1 +▁numerosos 1 +▁básica 1 +▁pas 1 +▁crítica 1 +/59/ 1 +iéndose 1 +pos 1 +▁Regional 1 +▁significativa 1 +▁administrativas 1 +dí 1 +▁Más 1 +ual 1 +▁halla 1 +liber 1 +mite 1 +▁Era 1 +▁edificio 1 +▁Darfur 1 +▁Cuestiones 1 +▁nombramiento 1 +▁sentir 1 +▁corto 1 +▁debían 1 +▁Les 1 +ndi 1 +▁salir 1 +NU 1 +▁bosques 1 +▁realizada 1 +▁despliegue 1 +▁Web 1 +35 1 +▁puerta 1 +▁empresarial 1 +▁encargada 1 +▁brinda 1 +▁Gaza 1 +▁ten 1 +▁reconstrucción 1 +▁Haití 1 +▁quinto 1 +▁Sistema 1 +▁estableció 1 +ji 1 +▁observadores 1 +▁Ko 1 +▁calcula 1 +▁terminar 1 +▁rural 1 +▁mortalidad 1 +▁límites 1 +▁Des 1 +▁Bretaña 1 +▁incluya 1 +▁película 1 +▁excepción 1 +▁Human 1 +▁secundaria 1 +▁próximos 1 +▁cámara 1 +▁dignidad 1 +/5 1 +ier 1 +rlos 1 +cido 1 +rd 1 +cept 1 +▁gas 1 +rte 1 +▁genera 1 +▁multilateral 1 +▁presentada 1 +▁Austria 1 +▁fund 1 +▁cabeza 1 +▁detenidos 1 +▁ordenación 1 +▁33 1 +▁recon 1 +arse 1 +▁auditoría 1 +▁representan 1 +▁avanzar 1 +Ivoire 1 +▁Discriminación 1 +▁hará 1 +▁entidad 1 +▁analizar 1 +▁destacar 1 +▁adecuadas 1 +▁Autoridad 1 +▁arma 1 +▁conexas 1 +▁identificar 1 +▁ii 1 +▁video 1 +▁afectan 1 +▁abs 1 +orient 1 +mó 1 +▁1992 1 +▁distancia 1 +lec 1 +▁LA 1 +▁justa 1 +▁paga 1 +▁entraña 1 +gna 1 +▁Operaciones 1 +▁determinado 1 +len 1 +▁guarda 1 +▁abuso 1 +▁Tenien 1 +▁Fu 1 +▁especifica 1 +▁televisión 1 +26 1 +▁tendrán 1 +▁comunicar 1 +▁ponga 1 +▁Dios 1 +▁documentación 1 +icia 1 +▁decidido 1 +▁elevado 1 +▁publica 1 +▁positivo 1 +▁utilizando 1 +▁alcanzado 1 +▁Asesor 1 +▁or 1 +▁36 1 +78 1 +▁circulación 1 +▁gu 1 +Re 1 +▁Ac 1 +eron 1 +tario 1 +▁habido 1 +▁provincia 1 +▁conjuntamente 1 +▁hacerse 1 +▁combustible 1 +▁superficie 1 +▁vehículos 1 +▁Políticos 1 +▁financiar 1 +23 1 +▁soberanía 1 +/60/ 1 +▁demostrar 1 +▁copia 1 +▁colabora 1 +▁específicos 1 +▁organiza 1 +esta 1 +▁mínima 1 +95 1 +▁Zelandia 1 +▁potencia 1 +▁aparece 1 +▁volumen 1 +▁continente 1 +▁registrado 1 +▁indicar 1 +▁Croacia 1 +▁privadas 1 +▁Ti 1 +▁reto 1 +▁capa 1 +lex 1 +▁IV 1 +/63/ 1 +▁estable 1 +▁escuchar 1 +▁situado 1 +▁Primera 1 +▁ejemplos 1 +▁modificar 1 +▁integrar 1 +▁usa 1 +▁Hasta 1 +84 1 +aron 1 +▁correo 1 +▁Côte 1 +▁Wi 1 +▁Oficiales 1 +▁Quinta 1 +▁comprende 1 +▁controlar 1 +▁39 1 +▁cualquiera 1 +▁calendario 1 +▁método 1 +▁destinadas 1 +66 1 +▁cultivo 1 +▁ocurre 1 +▁dura 1 +▁apertura 1 +▁Tengo 1 +▁promulga 1 +▁étnico 1 +/57/ 1 +sol 1 +rie 1 +▁exactamente 1 +21 1 +▁gobernanza 1 +▁juntos 1 +▁víctima 1 +▁similar 1 +▁aumentado 1 +▁salva 1 +▁territorial 1 +▁ocupado 1 +vin 1 +67 1 +nio 1 +lica 1 +gio 1 +jó 1 +▁Polonia 1 +▁naciones 1 +wi 1 +▁gana 1 +▁gen 1 +ON 1 +cra 1 +▁respeta 1 +▁exportación 1 +▁aprendizaje 1 +▁Sólo 1 +▁procede 1 +▁darle 1 +doras 1 +▁1991 1 +▁concreto 1 +▁indispensable 1 +▁precisa 1 +▁siquiera 1 +rup 1 +▁observar 1 +fra 1 +▁estatales 1 +▁Georgia 1 +▁cumbre 1 +▁Sí 1 +ticos 1 +▁pacientes 1 +▁conocido 1 +▁conversaciones 1 +▁legislativas 1 +/56/ 1 +▁Pres 1 +▁encarga 1 +▁diputados 1 +▁Adjunto 1 +▁integral 1 +▁duradera 1 +▁créditos 1 +ID 1 +Con 1 +200 1 +▁células 1 +▁cometido 1 +▁probable 1 +▁Uganda 1 +tras 1 +▁sometido 1 +▁agrícola 1 +▁tradicional 1 +▁Segunda 1 +▁futuras 1 +▁ejecutar 1 +▁comprometido 1 +▁transparente 1 +▁Todas 1 +▁2004. 1 +▁corazón 1 +▁proporción 1 +▁encuesta 1 +▁prestado 1 +▁2006. 1 +▁institucionales 1 +▁limitar 1 +▁introducción 1 +▁permiso 1 +▁beneficia 1 +▁racional 1 +▁celebradas 1 +rri 1 +▁sexuales 1 +▁ciertos 1 +ot 1 +▁International 1 +dero 1 +▁posee 1 +▁foro 1 +▁penales 1 +lina 1 +76 1 +▁Grecia 1 +▁Nuestra 1 +▁desechos 1 +▁comité 1 +▁paquete 1 +▁Pri 1 +Z 1 +▁ga 1 +▁Perú 1 +▁mencionado 1 +▁factor 1 +▁adopten 1 +▁mas 1 +▁1, 1 +▁Policía 1 +▁Directiva 1 +▁selecciona 1 +▁comprensión 1 +▁Jefe 1 +int 1 +▁Congreso 1 +▁Corr 1 +▁presentadas 1 +▁introducir 1 +taria 1 +▁anuales 1 +69 1 +presión 1 +▁Quisiera 1 +▁pendientes 1 +▁permanentes 1 +▁detalles 1 +▁conservación 1 +▁líderes 1 +▁Fiscal 1 +▁adopte 1 +▁limita 1 +IN 1 +▁formula 1 +gas 1 +▁físico 1 +▁amor 1 +▁Cre 1 +▁oportuna 1 +▁prestaciones 1 +▁tratando 1 +▁realizadas 1 +▁bio 1 +▁presidencia 1 +▁popular 1 +▁cabal 1 +▁mejoras 1 +▁Debemos 1 +▁Etiopía 1 +▁lu 1 +▁Guatemala 1 +▁evaluaciones 1 +74 1 +▁igualmente 1 +▁desempeño 1 +▁deberíamos 1 +od 1 +▁observación 1 +▁educativo 1 +▁coherente 1 +▁pronuncia 1 +▁ofrecen 1 +▁negociación 1 +ático 1 +este 1 +▁previstas 1 +▁sabía 1 +▁Add 1 +TE 1 +▁junta 1 +/62/ 1 +▁Eliminación 1 +▁perspectivas 1 +24 1 +▁goza 1 +▁finalmente 1 +▁migración 1 +osa 1 +▁legales 1 +▁Algunas 1 +▁restaurante 1 +▁comentarios 1 +▁aquel 1 +▁permitirá 1 +▁daño 1 +▁mala 1 +▁pérdidas 1 +▁asumir 1 +▁Tu 1 +▁coordinar 1 +cua 1 +▁agradecimiento 1 +arios 1 +▁libros 1 +29 1 +19 1 +continuación 1 +▁toca 1 +▁múltiples 1 +▁obtenido 1 +▁utilizado 1 +▁' 1 +lidad 1 +media 1 +mini 1 +▁pacífica 1 +▁organismo 1 +gente 1 +▁Asistencia 1 +▁preventiva 1 +▁estará 1 +▁publicado 1 +▁proceder 1 +▁adecuados 1 +▁encargados 1 +/61/ 1 +▁determinadas 1 +▁arriba 1 +▁alguno 1 +▁prima 1 +▁disminución 1 +voc 1 +dro 1 +33 1 +▁hubo 1 +tiza 1 +51 1 +▁determina 1 +▁Portugal 1 +▁Muchos 1 +▁42 1 +▁mata 1 +7% 1 +▁publicar 1 +enda 1 +ris 1 +▁abogado 1 +▁retos 1 +▁muerto 1 +▁Del 1 +▁tendencias 1 +▁someter 1 +▁metros 1 +▁Can 1 +22 1 +iga 1 +ag 1 +▁49 1 +▁2009, 1 +▁resumen 1 +▁efectivos 1 +ire 1 +▁ejército 1 +▁erradicación 1 +▁voz 1 +mpli 1 +▁post 1 +▁mencionar 1 +▁limitaciones 1 +▁vital 1 +▁geográfica 1 +▁diseñado 1 +▁2003. 1 +▁capaz 1 +▁modalidades 1 +▁buenos 1 +▁Suplemento 1 +▁on 1 +▁herramientas 1 +▁abusos 1 +83 1 +▁procedentes 1 +▁2007. 1 +▁aprobadas 1 +▁Territorio 1 +.8 1 +▁Roma 1 +▁encanta 1 +▁Sus 1 +▁asignación 1 +▁vivo 1 +mon 1 +▁afectadas 1 +▁Sostenible 1 +▁inmediata 1 +▁constituir 1 +▁Bar 1 +▁/ 1 +▁ge 1 +ust 1 +▁cargos 1 +▁comprender 1 +▁exportaciones 1 +▁israelí 1 +▁selección 1 +▁58 1 +▁eje 1 +07 1 +▁44 1 +▁estilo 1 +▁77 1 +▁pág 1 +▁influencia 1 +▁innovación 1 +tención 1 +▁desempleo 1 +lá 1 +mpa 1 +▁perjuicio 1 +▁Medidas 1 +▁adultos 1 +▁aplicaciones 1 +▁57 1 +▁* 1 +▁Argelia 1 +▁laboratorio 1 +rado 1 +▁relieve 1 +▁parecer 1 +▁Finlandia 1 +▁máxima 1 +artículo 1 +▁negocios 1 +▁Fo 1 +curri 1 +▁reglamenta 1 +▁é 1 +▁siente 1 +80 1 +▁evento 1 +▁particulares 1 +▁contingentes 1 +▁dé 1 +▁aumenta 1 +▁indican 1 +▁justifica 1 +del 1 +▁euro 1 +eña 1 +▁espero 1 +▁difíciles 1 +▁mente 1 +mbo 1 +▁criterio 1 +▁alumnos 1 +▁disponibilidad 1 +▁llegó 1 +nica 1 +▁Val 1 +▁reclamación 1 +▁metas 1 +fica 1 +▁Sta 1 +bia 1 +▁efectivamente 1 +▁bases 1 +▁promedio 1 +▁cadena 1 +79 1 +▁bancos 1 +▁Dirección 1 +▁inmigración 1 +▁disciplina 1 +iz 1 +▁visitas 1 +▁Resolución 1 +▁suministra 1 +▁anuncia 1 +▁investigar 1 +▁Checa 1 +▁periódicos 1 +▁fre 1 +▁debida 1 +▁Fuerza 1 +▁considerarse 1 +▁declara 1 +▁55 1 +▁disfrutar 1 +▁SIDA 1 +fin 1 +cómo 1 +▁coste 1 +▁salario 1 +▁administrativos 1 +▁géneros 1 +▁suficientemente 1 +rig 1 +▁desempeña 1 +▁merece 1 +.30 1 +▁verdadera 1 +▁informado 1 +▁individuales 1 +▁carretera 1 +▁incremento 1 +▁Mientras 1 +▁revela 1 +▁41 1 +▁reconciliación 1 +▁70 1 +▁vínculos 1 +▁salida 1 +6% 1 +tá 1 +▁n 1 +▁80 1 +▁Bélgica 1 +▁examinando 1 +▁puso 1 +▁plataforma 1 +▁respectivamente 1 +uta 1 +plo 1 +mático 1 +▁Belarús 1 +como 1 +▁Relatora 1 +▁aplicables 1 +▁época 1 +pec 1 +TA 1 +▁Ibíd 1 +▁trate 1 +▁Vicepresidente 1 +▁mapa 1 +▁fiscales 1 +bol 1 +▁contempla 1 +plica 1 +▁cam 1 +▁pudieran 1 +produc 1 +▁firmado 1 +▁testigos 1 +República 1 +▁Interior 1 +ibilidad 1 +▁satisfacer 1 +cord 1 +▁cometidos 1 +▁letra 1 +70 1 +▁concesión 1 +▁permitan 1 +75 1 +▁aporta 1 +side 1 +▁adquisición 1 +hor 1 +▁ideal 1 +ster 1 +▁señalado 1 +▁miedo 1 +▁alimentación 1 +▁incorporación 1 +ibles 1 +▁variedad 1 +▁mantiene 1 +▁transmitir 1 +▁tránsito 1 +▁Tra 1 +▁revisar 1 +▁vistas 1 +▁medicamentos 1 +zó 1 +▁suelo 1 +▁mercancías 1 +▁ojos 1 +▁magistrados 1 +▁finalidad 1 +▁rea 1 +▁extrema 1 +▁funcionario 1 +rm 1 +2% 1 +▁imposible 1 +▁intervenciones 1 +▁dirigidas 1 +▁seguros 1 +▁permiten 1 +▁lengua 1 +▁insuficiente 1 +▁90 1 +▁inicio 1 +br 1 +▁2001. 1 +▁independientes 1 +▁PNUMA 1 +▁vulnerabilidad 1 +▁joven 1 +icos 1 +ora 1 +-1 1 +ant 1 +▁Cha 1 +▁lejos 1 +▁2002. 1 +▁asignado 1 +▁especies 1 +▁disfrute 1 +▁llevó 1 +▁Tercera 1 +▁proporcione 1 +▁aguas 1 +▁2000. 1 +1, 1 +▁rein 1 +▁Ab 1 +▁básico 1 +▁presentados 1 +44 1 +▁pagos 1 +▁argumento 1 +▁complace 1 +▁fui 1 +▁inspira 1 +▁señal 1 +▁existente 1 +▁blanco 1 +▁agrícolas 1 +▁sigan 1 +edi 1 +tando 1 +▁sumamente 1 +▁Esos 1 +Rev 1 +RE 1 +vel 1 +filia 1 +27 1 +▁satélite 1 +▁lectura 1 +▁tomando 1 +▁adapta 1 +▁2008. 1 +▁correcciones 1 +▁frecuente 1 +▁51 1 +73 1 +▁votar 1 +▁www 1 +▁Antes 1 +▁proveedores 1 +▁viajes 1 +▁53 1 +▁acceder 1 +02 1 +▁establezca 1 +▁CE 1 +▁definir 1 +▁prevista 1 +▁puedes 1 +▁dirigir 1 +▁malos 1 +gro 1 +▁ejecuta 1 +tamos 1 +▁46 1 +▁crucial 1 +▁histórico 1 +▁atrás 1 +▁dicen 1 +68 1 +▁experimenta 1 +38 1 +stre 1 +▁Tailandia 1 +▁cocina 1 +▁penas 1 +ting 1 +▁sirve 1 +▁Sol 1 +▁Rica 1 +▁historias 1 +lización 1 +eño 1 +▁limitada 1 +▁dedicado 1 +integr 1 +▁momentos 1 +▁vaya 1 +▁cierta 1 +▁pretende 1 +▁Árabes 1 +SP 1 +▁Malasia 1 +▁habitual 1 +▁aplicado 1 +▁cientos 1 +▁avance 1 +▁Recursos 1 +▁respectivos 1 +▁moneda 1 +▁biológica 1 +31 1 +▁nacimiento 1 +▁químicos 1 +▁(2001) 1 +▁sexto 1 +▁respond 1 +▁entiende 1 +▁soldados 1 +▁Alta 1 +▁espacial 1 +▁apropiado 1 +▁Filipinas 1 +ED 1 +▁espaciales 1 +▁intentar 1 +▁47 1 +▁Ke 1 +▁unidades 1 +3% 1 +▁ulterior 1 +▁consejo 1 +▁cha 1 +▁modelos 1 +▁adoptada 1 +▁consta 1 +▁humanitarias 1 +▁tarjeta 1 +▁pienso 1 +▁Real 1 +▁escucha 1 +81 1 +vu 1 +▁Túnez 1 +▁salvo 1 +▁Dinamarca 1 +▁reintegración 1 +▁software 1 +▁ejecutivo 1 +▁audiencia 1 +▁solicitado 1 +▁guía 1 +▁asegura 1 +▁frontera 1 +▁examine 1 +▁raíz 1 +▁mediano 1 +▁reunir 1 +▁marino 1 +▁verdadero 1 +▁formulada 1 +▁establecidas 1 +▁CO 1 +▁Miembro 1 +▁quizás 1 +▁Ciencia 1 +▁38 1 +leta 1 +▁Refugiados 1 +▁permitido 1 +▁Red 1 +▁imponer 1 +▁tus 1 +cel 1 +▁aprecia 1 +▁convenios 1 +vol 1 +34 1 +▁aeropuerto 1 +▁antigua 1 +▁Acoge 1 +▁pat 1 +▁seminarios 1 +▁seminario 1 +▁presupuestario 1 +▁enlace 1 +▁supervisar 1 +▁ataque 1 +ería 1 +▁involucra 1 +▁párrs 1 +▁200 1 +▁reciben 1 +▁objetos 1 +▁Santa 1 +4% 1 +05 1 +▁puerto 1 +▁Myanmar 1 +parte 1 +▁pasó 1 +▁56 1 +▁considerado 1 +▁entrevista 1 +▁gratuita 1 +▁raza 1 +cé 1 +bili 1 +puesto 1 +▁incorpora 1 +▁izquierda 1 +DI 1 +▁apropiadas 1 +▁acumula 1 +▁hospital 1 +▁52 1 +▁utilizan 1 +▁comenzó 1 +▁equitativa 1 +▁referente 1 +▁capaces 1 +▁notable 1 +rías 1 +▁Bangladesh 1 +▁compleja 1 +▁desafío 1 +▁compañía 1 +posición 1 +▁torno 1 +Á 1 +8% 1 +▁impuestos 1 +▁altos 1 +▁cooperar 1 +▁cifra 1 +▁presentan 1 +▁Potencia 1 +▁éstas 1 +3) 1 +28 1 +48 1 +▁leer 1 +▁1998, 1 +▁desarrollado 1 +▁deriva 1 +▁efectuar 1 +ografía 1 +▁electoral 1 +37 1 +▁indicó 1 +▁convertido 1 +▁generar 1 +damente 1 +▁positivos 1 +▁prostitución 1 +nico 1 +▁Supervisión 1 +forma 1 +▁nueve 1 +ológica 1 +▁manifiesto 1 +▁practica 1 +▁emplea 1 +▁43 1 +fir 1 +▁gama 1 +▁Observa 1 +▁Actualmente 1 +▁impulso 1 +▁superiores 1 +▁Ob 1 +SA 1 +▁requisito 1 +61 1 +87 1 +▁revisado 1 +▁árabes 1 +▁Du 1 +▁km 1 +▁62 1 +ñe 1 +▁estrechamente 1 +▁coherencia 1 +▁ONUDI 1 +▁posteriormente 1 +▁camina 1 +SE 1 +▁voluntarias 1 +▁personales 1 +▁medicina 1 +▁juez 1 +71 1 +▁go 1 +▁activo 1 +▁incluyendo 1 +▁Mantenimiento 1 +▁Rumania 1 +▁Civil 1 +jar 1 +▁micro 1 +▁voluntaria 1 +▁convertir 1 +▁2, 1 +▁encontramos 1 +dé 1 +ima 1 +iste 1 +▁iniciado 1 +▁Ecuador 1 +▁Cabe 1 +4) 1 +guard 1 +▁negocia 1 +▁índice 1 +▁profundo 1 +▁cumplido 1 +▁moderna 1 +▁Aprobación 1 +90 1 +tales 1 +▁ruta 1 +▁contribuyen 1 +▁europeas 1 +▁desplazamiento 1 +▁concentra 1 +▁recibió 1 +▁Azerbaiyán 1 +▁departamentos 1 +▁alimenta 1 +▁Sha 1 +Ha 1 +▁creó 1 +RA 1 +▁autorización 1 +▁condenado 1 +▁concreta 1 +▁mejoramiento 1 +▁noticias 1 +▁rendimiento 1 +ard 1 +▁abrir 1 +▁Bulgaria 1 +▁prolonga 1 +▁decide 1 +▁Promoción 1 +CA 1 +▁promesa 1 +▁distribuir 1 +gia 1 +▁Droga 1 +▁OMC 1 +tener 1 +▁habitación 1 +▁Exhorta 1 +▁meridional 1 +▁Angola 1 +1) 1 +▁estudia 1 +▁liderazgo 1 +▁sensibilización 1 +▁Iniciativa 1 +▁Sobre 1 +▁básicas 1 +Hábitat 1 +lig 1 +▁Venezuela 1 +▁líneas 1 +▁asesinato 1 +▁sueño 1 +▁vio 1 +94 1 +▁votado 1 +▁compromete 1 +▁Ver 1 +▁significativo 1 +ños 1 +▁cer 1 +▁acepta 1 +▁ingreso 1 +77 1 +▁dolor 1 +PE 1 +▁ligeras 1 +▁Alianza 1 +▁París 1 +▁realice 1 +ador 1 +▁pl 1 +ph 1 +▁sitios 1 +▁metodología 1 +▁urgencia 1 +▁Kenya 1 +pli 1 +▁ocupan 1 +▁Introducción 1 +▁estuvo 1 +ah 1 +▁ratificado 1 +▁administra 1 +▁expone 1 +▁usando 1 +OR 1 +▁Cultura 1 +▁totalidad 1 +▁financia 1 +▁controles 1 +▁reducido 1 +▁suerte 1 +▁residentes 1 +▁sonido 1 +▁explota 1 +▁decía 1 +▁agricultores 1 +▁campañas 1 +sia 1 +▁Civiles 1 +32 1 +▁juzga 1 +▁aun 1 +mov 1 +▁redacción 1 +▁Supremo 1 +▁podríamos 1 +▁Sal 1 +▁planos 1 +▁adaptación 1 +stitución 1 +/2000/ 1 +▁coloca 1 +▁playa 1 +▁aldea 1 +▁Reconociendo 1 +▁edificios 1 +:// 1 +▁ultraterrestre 1 +▁negro 1 +ular 1 +▁tienda 1 +04 1 +▁jueces 1 +▁Expresa 1 +▁globalización 1 +AM 1 +▁Lanka 1 +1% 1 +▁vuelo 1 +▁confi 1 +▁reglamentos 1 +terinstitucional 1 +▁verificación 1 +▁competencias 1 +▁hambre 1 +CO 1 +▁podamos 1 +▁Financiación 1 +misión 1 +visión 1 +▁armadas 1 +▁denomina 1 +▁administrativo 1 +89 1 +▁llena 1 +▁incrementar 1 +▁aprobados 1 +▁hice 1 +▁realizan 1 +▁Lisboa 1 +▁garantice 1 +▁pertenece 1 +▁partido 1 +ity 1 +▁prioritaria 1 +▁Media 1 +chos 1 +fí 1 +ábamos 1 +▁fundamento 1 +09 1 +public 1 +▁fabricación 1 +▁destinado 1 +▁armado 1 +▁moderno 1 +▁terrestre 1 +▁j 1 +dura 1 +▁logra 1 +▁Hungría 1 +▁Pueblo 1 +reg 1 +eti 1 +▁baño 1 +▁Montenegro 1 +▁continúe 1 +▁excepcional 1 +▁CP 1 +▁jefes 1 +▁Saudita 1 +▁Arabia 1 +▁peso 1 +▁robot 1 +▁administrativa 1 +illas 1 +cionales 1 +▁planta 1 +parti 1 +▁velocidad 1 +▁represión 1 +tino 1 +▁define 1 +ug 1 +▁hermano 1 +cado 1 +▁respalda 1 +▁sanitaria 1 +▁Nepal 1 +▁obligatoria 1 +▁registrada 1 +▁religiosas 1 +▁desastre 1 +▁inteligente 1 +ted 1 +▁estimaciones 1 +▁(2004) 1 +▁impunidad 1 +▁Estudi 1 +igu 1 +▁Fe 1 +bur 1 +part 1 +▁conexión 1 +▁abajo 1 +▁aceptación 1 +▁gasto 1 +▁regla 1 +▁firmemente 1 +▁flexibilidad 1 +Sur 1 +▁contaminación 1 +ty 1 +▁ajusta 1 +▁requieren 1 +americano 1 +▁ocurrido 1 +▁individuos 1 +▁corta 1 +▁Podemos 1 +▁oposición 1 +▁obras 1 +eta 1 +▁inicia 1 +▁estándar 1 +▁Sri 1 +▁notificación 1 +▁llevan 1 +▁fallo 1 +▁vive 1 +▁previa 1 +▁asentamientos 1 +▁derecha 1 +▁corrientes 1 +▁límite 1 +▁coopera 1 +▁revisada 1 +▁coche 1 +▁infra 1 +▁IN 1 +▁plantas 1 +▁Movimiento 1 +▁Luego 1 +Leste 1 +▁reforz 1 +▁nación 1 +▁carbono 1 +▁subprograma 1 +▁VI 1 +▁contribuido 1 +/2001/ 1 +Ma 1 +▁consolidar 1 +▁insulares 1 +▁Cor 1 +▁desee 1 +▁conecta 1 +▁negativa 1 +▁ausencia 1 +▁perder 1 +▁nuevamente 1 +93 1 +▁nombres 1 +▁disponer 1 +▁conexos 1 +up 1 +▁Or 1 +▁formato 1 +▁Ber 1 +▁Ci 1 +ique 1 +53 1 +▁pudo 1 +gada 1 +▁artista 1 +▁profundamente 1 +ético 1 +▁poli 1 +▁comités 1 +55 1 +▁adopta 1 +▁is 1 +▁correcta 1 +▁exclusión 1 +▁inteligencia 1 +▁convencido 1 +▁interpreta 1 +íamos 1 +▁llamar 1 +VI 1 +▁primordial 1 +▁felicitar 1 +▁consecución 1 +▁diciendo 1 +▁usuario 1 +▁Eritrea 1 +▁comprar 1 +▁caja 1 +▁intolerancia 1 +▁ilegales 1 +▁recoge 1 +PA 1 +7) 1 +▁procura 1 +▁ciento 1 +Г 1 +▁manual 1 +▁privados 1 +rez 1 +▁Defensa 1 +▁viviendas 1 +▁cumplan 1 +AT 1 +▁equivalente 1 +end 1 +36 1 +▁Destaca 1 +▁futura 1 +▁pese 1 +▁votos 1 +ze 1 +ño 1 +▁específica 1 +vas 1 +▁pensamos 1 +▁movimientos 1 +▁menciona 1 +▁transmisión 1 +▁estrellas 1 +▁suya 1 +▁ventajas 1 +▁& 1 +cap 1 +▁pensamiento 1 +▁15.00 1 +▁tuvieron 1 +▁cláusula 1 +▁convención 1 +▁record 1 +▁PMA 1 +▁textos 1 +▁Decenio 1 +▁inmigrantes 1 +her 1 +▁consciente 1 +tic 1 +▁funcionar 1 +▁chino 1 +▁compañías 1 +▁instrucciones 1 +▁mantenga 1 +▁vuelve 1 +www 1 +▁recurrir 1 +85 1 +▁10.00 1 +caso 1 +ub 1 +92 1 +▁ritmo 1 +▁anima 1 +▁Otra 1 +▁duplica 1 +▁error 1 +▁expresado 1 +▁Año 1 +▁serio 1 +ang 1 +▁asistir 1 +▁experimento 1 +▁palestina 1 +gh 1 +▁normativo 1 +▁Jordania 1 +▁retraso 1 +bli 1 +▁seguía 1 +q 1 +▁aplicando 1 +47 1 +▁identificación 1 +zos 1 +▁turismo 1 +▁orgánico 1 +▁sufrimiento 1 +▁integrante 1 +▁visual 1 +▁aportar 1 +▁analiza 1 +▁Común 1 +nacional 1 +▁creen 1 +▁Jefes 1 +▁Eslovenia 1 +▁61 1 +▁competente 1 +▁reconocido 1 +▁relativamente 1 +▁acelerar 1 +▁habida 1 +▁abogados 1 +/2004/ 1 +▁realizando 1 +▁consultar 1 +▁59 1 +▁colectivo 1 +▁Estadística 1 +▁transacciones 1 +65 1 +▁relacionada 1 +▁produce 1 +▁causado 1 +▁dando 1 +▁mirar 1 +AP 1 +SR 1 +▁vinculante 1 +▁vino 1 +▁garantizado 1 +▁formulado 1 +88 1 +▁diario 1 +ví 1 +▁utilizados 1 +activa 1 +▁sacar 1 +▁presentara 1 +▁exista 1 +▁UNFPA 1 +▁pantalla 1 +▁añadir 1 +▁val 1 +▁consideran 1 +▁oficio 1 +▁castigo 1 +▁defender 1 +▁digo 1 +▁esperamos 1 +▁Otro 1 +▁Actividades 1 +▁Procedimiento 1 +americana 1 +▁eficazmente 1 +▁fabrica 1 +43 1 +▁consideró 1 +▁patrocinadores 1 +▁Gar 1 +▁parcial 1 +▁temprana 1 +▁nacido 1 +▁sujeta 1 +▁estadounidenses 1 +▁detenido 1 +▁OIT 1 +▁autorizado 1 +▁Han 1 +72 1 +▁discu 1 +▁cuestionario 1 +imi 1 +▁simplifica 1 +imo 1 +▁actuación 1 +▁acusado 1 +▁Jerusalén 1 +▁Población 1 +▁Espero 1 +▁vacuna 1 +izó 1 +▁fotos 1 +▁mencionados 1 +▁impone 1 +▁carece 1 +In 1 +▁exigir 1 +▁específicamente 1 +/2002/ 1 +▁cuál 1 +ring 1 +▁pase 1 +63 1 +▁reconocida 1 +mentar 1 +ü 1 +▁sostenibilidad 1 +▁previo 1 +▁imparti 1 +39 1 +und 1 +ciente 1 +▁manifesta 1 +▁conveniente 1 +-2 1 +iano 1 +▁hoja 1 +▁preocupado 1 +▁exclusivamente 1 +▁certificado 1 +▁absoluto 1 +tización 1 +▁New 1 +▁Dado 1 +ológico 1 +▁toneladas 1 +98 1 +▁utilice 1 +▁dijeron 1 +IP 1 +▁Macedonia 1 +▁Párrafo 1 +▁Equipo 1 +▁vuelta 1 +▁declaró 1 +pp 1 +tec 1 +▁nu 1 +▁verde 1 +ST 1 +▁describe 1 +▁formuló 1 +▁priva 1 +muni 1 +▁extradición 1 +▁(2 1 +clar 1 +▁cliente 1 +▁lleve 1 +▁bal 1 +ker 1 +▁insolvencia 1 +▁pasando 1 +▁aplican 1 +▁Infancia 1 +▁cobertura 1 +▁St 1 +▁deporte 1 +Q 1 +▁precisamente 1 +ley 1 +▁regreso 1 +mm 1 +-3 1 +▁quedan 1 +ben 1 +▁prohíbe 1 +▁reparación 1 +▁castiga 1 +▁río 1 +▁elevada 1 +▁terminado 1 +▁debajo 1 +/2003/ 1 +siderablemente 1 +▁aceptable 1 +▁comunicado 1 +human 1 +▁motor 1 +▁memoria 1 +line 1 +▁Libia 1 +▁encima 1 +54 1 +▁fueran 1 +▁adelanto 1 +▁creemos 1 +▁Pueden 1 +▁básicamente 1 +▁cantidades 1 +▁convenido 1 +▁subregionales 1 +fina 1 +▁criminal 1 +▁sustancial 1 +▁saldo 1 +9% 1 +▁séptimo 1 +▁teoría 1 +▁domina 1 +▁Universal 1 +▁incumplimiento 1 +▁medioambiental 1 +▁redonda 1 +41 1 +▁pul 1 +imiento 1 +▁ocupados 1 +▁Terrorismo 1 +ética 1 +Pro 1 +▁intensificar 1 +▁pocas 1 +▁complementaria 1 +▁formar 1 +86 1 +▁piensa 1 +▁Camboya 1 +▁Senegal 1 +▁genocidio 1 +▁sufrido 1 +▁montaña 1 +▁convertirse 1 +cil 1 +▁peor 1 +▁parecen 1 +59 1 +▁aportan 1 +▁aprend 1 +▁cama 1 +▁energética 1 +▁Reafirmando 1 +▁ganar 1 +▁diplomática 1 +▁participado 1 +cid 1 +▁publicaciones 1 +▁Investigación 1 +▁dimensión 1 +▁vale 1 +▁herramienta 1 +▁establecida 1 +dec 1 +▁estuviera 1 +aña 1 +▁Administrativos 1 +MIN 1 +▁encuentre 1 +▁fotografía 1 +▁autora 1 +▁periódicamente 1 +▁lenguaje 1 +celera 1 +▁enfoques 1 +▁empieza 1 +▁Tortura 1 +▁rendición 1 +▁separación 1 +▁TED 1 +▁vías 1 +▁500 1 +▁actitud 1 +ult 1 +▁Racial 1 +▁rodea 1 +▁2) 1 +▁visitar 1 +46 1 +▁máquina 1 +▁etapas 1 +▁asesor 1 +▁apruebe 1 +▁estaría 1 +▁modificación 1 +▁operacional 1 +Firmado 1 +▁gubernamental 1 +▁reclamante 1 +mico 1 +▁formal 1 +▁agrega 1 +▁reproductiva 1 +▁contactos 1 +▁alternativas 1 +▁perdido 1 +cieron 1 +pondrá 1 +8) 1 +▁impuesto 1 +tividad 1 +▁programación 1 +tiende 1 +▁defensores 1 +ell 1 +▁desempeñan 1 +▁proporcionado 1 +▁Fundación 1 +▁gradual 1 +▁Beijing 1 +▁Lituania 1 +64 1 +▁Otras 1 +cent 1 +▁conseguido 1 +ef 1 +▁iii 1 +▁Queda 1 +▁actuaciones 1 +▁campos 1 +2/ 1 +tema 1 +bel 1 +▁jefe 1 +▁interesa 1 +▁captura 1 +▁brindar 1 +▁XXI 1 +▁cuentan 1 +▁We 1 +oro 1 +▁década 1 +▁elegido 1 +▁futuros 1 +▁cartas 1 +▁presentará 1 +▁documenta 1 +▁presupuestarias 1 +▁estatal 1 +▁extraordinaria 1 +▁inquietud 1 +▁Invita 1 +▁considere 1 +▁barrio 1 +▁clases 1 +▁movilización 1 +▁hiciera 1 +/2005/ 1 +▁ignora 1 +▁enuncia 1 +▁viable 1 +▁bebé 1 += 1 +▁correcto 1 +▁vigésimo 1 +▁renovable 1 +▁participen 1 +42 1 +▁deficiencias 1 +▁convenciones 1 +▁compatible 1 +▁páginas 1 +▁Familia 1 +Cuál 1 +▁Viet 1 +▁ampliamente 1 +▁esforz 1 +– 1 +▁corresponda 1 +▁erradicar 1 +▁exámenes 1 +▁legítima 1 +▁obliga 1 +tur 1 +▁renta 1 +vid 1 +▁histórica 1 +IV 1 +▁preparativos 1 +▁negocio 1 +ece 1 +▁productores 1 +▁absolutamente 1 +▁Incluso 1 +▁quedar 1 +-19 1 +▁aclara 1 +bul 1 +▁haberse 1 +▁ruso 1 +gal 1 +▁: 1 +▁intensifica 1 +CT 1 +▁bajos 1 +56 1 +▁flexible 1 +▁muestran 1 +▁arbitraria 1 +▁Usted 1 +dra 1 +▁Sírvanse 1 +pusieron 1 +▁Acta 1 +▁caracteriza 1 +58 1 +▁agrava 1 +91 1 +▁300 1 +▁patrimonio 1 +▁enfrentar 1 +ear 1 +▁laborales 1 +▁Él 1 +COM 1 +/2006/ 1 +▁detalle 1 +▁adoptó 1 +▁agresión 1 +tuvieron 1 +▁somet 1 +▁manifestaciones 1 +▁Reafirma 1 +▁siguió 1 +▁Chi 1 +▁hubieran 1 +▁2009. 1 +▁director 1 +pan 1 +▁interacción 1 +ux 1 +▁amigo 1 +▁archivos 1 +▁frase 1 +▁creer 1 +▁Capítulo 1 +▁escribir 1 +▁subsidio 1 +▁excesiva 1 +▁detener 1 +▁Armenia 1 +▁Ghana 1 +▁acogida 1 +▁regímenes 1 +▁1) 1 +▁errores 1 +▁monto 1 +II 1 +▁prioritario 1 +▁juegos 1 +▁preguntar 1 +▁sustantiva 1 +ifi 1 +▁tú 1 +tán 1 +▁kilómetros 1 +▁convierte 1 +▁conceder 1 +▁diga 1 +▁Sch 1 +▁designado 1 +mal 1 +▁comparte 1 +▁modificaciones 1 +▁preferencia 1 +▁cuarta 1 +▁ataca 1 +▁basadas 1 +gráfica 1 +▁Bolivia 1 +▁impide 1 +▁Documento 1 +▁Comisionada 1 +▁alternativo 1 +cepción 1 +▁discursos 1 +▁Energía 1 +▁adquirida 1 +▁apliquen 1 +ístico 1 +han 1 +▁urbano 1 +▁empleado 1 +-4 1 +▁saneamiento 1 +▁OIEA 1 +▁armonización 1 +▁ido 1 +▁adquirir 1 +▁sencilla 1 +▁llegue 1 +▁recibe 1 +▁Verde 1 +▁posteriores 1 +▁acredita 1 +▁competitividad 1 +lio 1 +▁sólida 1 +▁Agricultura 1 +▁alerta 1 +▁hicimos 1 +▁Chad 1 +osas 1 +▁inscripción 1 +bio 1 +rimi 1 +▁Superior 1 +▁últimas 1 +▁pensé 1 +▁Salvador 1 +▁altura 1 +▁humanitarios 1 +▁realizó 1 +▁materna 1 +▁plantear 1 +▁ll 1 +▁aumentando 1 +▁reciba 1 +▁tienes 1 +▁difundir 1 +▁pasos 1 +▁existía 1 +reci 1 +▁margen 1 +▁convencionales 1 +▁invitación 1 +▁tolerancia 1 +ólogo 1 +▁pan 1 +▁Caja 1 +▁comienza 1 +▁facilite 1 +▁tecnológica 1 +▁municiones 1 +▁libres 1 +▁ACNUDH 1 +▁continuo 1 +▁periódica 1 +▁anuncio 1 +▁America 1 +▁octavo 1 +ak 1 +06 1 +▁ganado 1 +▁Qui 1 +▁Reconoce 1 +▁Solo 1 +▁Esperamos 1 +▁peces 1 +▁opone 1 +▁Interna 1 +▁enjuiciamiento 1 +▁colaborar 1 +▁flor 1 +5.000 1 +▁Uruguay 1 +▁sanitario 1 +▁concretamente 1 +▁tecnológico 1 +▁corriente 1 +▁descarga 1 +▁avanzado 1 +▁tardar 1 +▁avión 1 +▁expuesto 1 +▁posiciones 1 +▁reside 1 +▁Alienta 1 +▁concluido 1 +▁informativa 1 +▁vigente 1 +▁dinámica 1 +▁riqueza 1 +▁instalación 1 +▁préstamos 1 +▁mencionadas 1 +▁adecuadamente 1 +▁transnacional 1 +▁prácticamente 1 +▁déficit 1 +▁enormes 1 +versión 1 +▁aclarar 1 +▁ciudadano 1 +shi 1 +▁antiguo 1 +log 1 +contra 1 +▁azul 1 +▁mon 1 +▁observador 1 +ponga 1 +▁doméstica 1 +▁pertenecientes 1 +▁secreto 1 +▁liberalización 1 +▁Orden 1 +▁culturas 1 +▁Dr 1 +▁Objetivos 1 +net 1 +▁descubrir 1 +▁Exp 1 +▁tro 1 +mil 1 +▁temático 1 +▁Pas 1 +lico 1 +▁atentado 1 +uro 1 +49 1 +▁manifiesta 1 +▁revista 1 +▁millón 1 +volución 1 +arias 1 +▁culpable 1 +glo 1 +tem 1 +▁conserva 1 +▁Doha 1 +▁dia 1 +▁tele 1 +▁fácilmente 1 +▁autonomía 1 +▁movilidad 1 +▁aplicarse 1 +▁enumera 1 +▁ministerios 1 +▁Moldova 1 +▁solucionar 1 +▁Día 1 +▁sorprend 1 +▁quedado 1 +mple 1 +licit 1 +▁buques 1 +▁plazos 1 +▁tome 1 +fort 1 +▁http 1 +’ 1 +▁propuso 1 +▁arreglos 1 +▁solicitantes 1 +▁Q 1 +▁finalizar 1 +▁alojamiento 1 +▁electricidad 1 +▁apenas 1 +▁jugar 1 +▁prestan 1 +▁asignar 1 +▁basados 1 +▁subrayar 1 +▁uniforme 1 +IM 1 +▁comunica 1 +▁presentaron 1 +nova 1 +▁patente 1 +▁imaginar 1 +▁planteamiento 1 +▁quiera 1 +▁normalmente 1 +▁estadounidense 1 +RO 1 +▁lamentable 1 +▁che 1 +▁enviado 1 +ani 1 +▁ponen 1 +▁art 1 +▁dispuesta 1 +▁maestros 1 +▁FAO 1 +▁prácticos 1 +▁controversias 1 +▁UNESCO 1 +▁responde 1 +▁resta 1 +▁suelen 1 +▁50% 1 +▁científico 1 +▁creados 1 +▁legisla 1 +▁complejidad 1 +▁expulsión 1 +▁gasta 1 +▁apropiada 1 +comp 1 +▁avanza 1 +▁Londres 1 +▁casas 1 +82 1 +▁hija 1 +▁alega 1 +▁Eslovaquia 1 +▁recomendar 1 +▁Debería 1 +▁cálculo 1 +▁movilizar 1 +▁charla 1 +▁corte 1 +▁vigilar 1 +62 1 +▁don 1 +▁Tom 1 +▁niña 1 +▁ministerial 1 +▁pensando 1 +▁envío 1 +stitui 1 +▁retirada 1 +▁regresar 1 +▁amplias 1 +▁2006-2007 1 +▁(1999) 1 +▁Constitucional 1 +dina 1 +▁Euro 1 +▁Tecnología 1 +▁adaptar 1 +▁rige 1 +▁recuperar 1 +NA 1 +▁vigentes 1 +▁Tanzanía 1 +▁encontrado 1 +▁Ban 1 +▁madres 1 +▁Coordinador 1 +▁pareja 1 +▁sorprendente 1 +▁imparcial 1 +▁específico 1 +▁Kazajstán 1 +▁xenofobia 1 +▁pensiones 1 +dy 1 +▁declarado 1 +▁bilateral 1 +▁distinción 1 +▁utilidad 1 +US 1 +cí 1 +mun 1 +▁dirige 1 +08 1 +▁Integra 1 +▁creencias 1 +▁agrado 1 +col 1 +▁enfrentan 1 +Puedo 1 +▁verano 1 +▁urbanas 1 +▁oído 1 +▁dispuestos 1 +▁Culturales 1 +▁Nam 1 +▁registra 1 +▁solar 1 +▁cumple 1 +▁Observando 1 +▁defini 1 +ction 1 +▁obstaculiza 1 +▁socorro 1 +▁Principios 1 +▁ilícitos 1 +▁práctico 1 +▁aniversario 1 +▁receptor 1 +▁escenario 1 +gri 1 +vesti 1 +adores 1 +▁listas 1 +mia 1 +▁abandona 1 +▁Comisaria 1 +▁biblioteca 1 +▁diaria 1 +▁Segundo 1 +▁mezcla 1 +▁cuadr 1 +▁invitó 1 +▁mantenido 1 +▁extranjera 1 +▁Malta 1 +ight 1 +▁foros 1 +▁combinación 1 +▁municipio 1 +▁Resulta 1 +▁armamentos 1 +▁virus 1 +▁expresaron 1 +▁explicación 1 +▁alternativa 1 +▁etiqueta 1 +▁ocupar 1 +▁adquisiciones 1 +▁discurso 1 +▁almacenamiento 1 +▁venido 1 +▁Letonia 1 +▁Albania 1 +▁quieres 1 +▁recepción 1 +▁Partido 1 +▁legítimo 1 +▁aviones 1 +▁fijado 1 +▁concluye 1 +▁John 1 +▁exigencias 1 +▁considerables 1 +ham 1 +▁registros 1 +▁verdaderamente 1 +▁inició 1 +▁odio 1 +tual 1 +▁étnicas 1 +▁apartamento 1 +▁universo 1 +▁libremente 1 +ET 1 +▁barrera 1 +▁rela 1 +▁consonancia 1 +▁crítico 1 +▁financiado 1 +▁(2000) 1 +▁Camerún 1 +▁Nairobi 1 +▁propietario 1 +▁tuve 1 +▁salvaguardias 1 +▁postura 1 +▁disco 1 +▁ubicación 1 +52 1 +lip 1 +▁preliminar 1 +▁imagina 1 +▁construye 1 +▁esposa 1 +▁concede 1 +▁Mont 1 +▁RE 1 +▁aprovecha 1 +▁serían 1 +▁63 1 +▁religiosa 1 +▁Ben 1 +▁satisfactoria 1 +▁fraude 1 +mita 1 +▁dada 1 +▁oral 1 +▁blanqueo 1 +▁ja 1 +▁transforma 1 +▁agradecer 1 +▁traslado 1 +▁retirar 1 +▁participaron 1 +▁preste 1 +▁cubrir 1 +▁expresamente 1 +▁envía 1 +▁constantemente 1 +▁mundialización 1 +▁parque 1 +▁aumentó 1 +▁aceptado 1 +▁Bri 1 +▁Col 1 +MO 1 +▁temor 1 +▁colega 1 +terna 1 +▁hubiese 1 +▁sentimiento 1 +▁rom 1 +▁dedicada 1 +▁Trans 1 +▁ensayos 1 +▁beneficiarios 1 +▁siento 1 +▁agenda 1 +▁equivoca 1 +▁piensan 1 +osos 1 +è 1 +ánico 1 +▁medir 1 +if 1 +▁investigadores 1 +▁invertir 1 +▁madera 1 +57 1 +▁Mauricio 1 +▁evidencia 1 +▁instrucción 1 +▁impulsar 1 +▁acusados 1 +▁gravedad 1 +▁comentario 1 +virt 1 +▁mensajes 1 +▁reconoció 1 +▁corporal 1 +▁dieron 1 +▁ciudadanía 1 +▁sangre 1 +▁Necesitamos 1 +▁profesores 1 +▁Singapur 1 +▁voluntarios 1 +Original 1 +▁Camp 1 +▁terrible 1 +▁disponga 1 +tz 1 +ulación 1 +▁vea 1 +▁definido 1 +▁É 1 +▁2004-2005 1 +▁Luxemburgo 1 +▁tipifica 1 +▁Vol 1 +eccion 1 +▁desplazadas 1 +▁generaciones 1 +▁desmovilización 1 +▁aparato 1 +▁interino 1 +▁lamenta 1 +▁jugador 1 +▁logrados 1 +▁centrales 1 +greso 1 +▁exporta 1 +ek 1 +▁Recomendación 1 +▁concertado 1 +7/ 1 +▁titulada 1 +tenta 1 +▁Cuenta 1 +▁centrar 1 +▁geo 1 +▁Bruselas 1 +▁Islandia 1 +▁Subraya 1 +▁dimensiones 1 +▁abre 1 +▁limpia 1 +▁hecha 1 +▁presunta 1 +▁periodo 1 +▁expectativas 1 +LA 1 +▁Fuerzas 1 +UE 1 +PI 1 +▁desean 1 +▁arquitectura 1 +▁productividad 1 +▁invoca 1 +ciencia 1 +▁banda 1 +▁credibilidad 1 +▁ninguno 1 +▁organizó 1 +▁actúa 1 +▁externos 1 +▁noveno 1 +rc 1 +▁pilar 1 +▁Kyoto 1 +▁constructivo 1 +▁Decreto 1 +ándole 1 +▁cambiado 1 +▁ejemplar 1 +▁Habid 1 +▁verbal 1 +▁Za 1 +▁inaceptable 1 +▁detallado 1 +▁necesariamente 1 +▁proponer 1 +▁» 1 +▁facilitado 1 +WG 1 +▁maneja 1 +▁derivados 1 +▁crimen 1 +▁comer 1 +▁dudas 1 +▁Qatar 1 +▁sanciona 1 +▁extremadamente 1 +▁entró 1 +▁tercero 1 +▁cuáles 1 +▁enseña 1 +▁Liga 1 +tieron 1 +▁cohesión 1 +▁empezó 1 +▁parecía 1 +▁protesta 1 +▁incidentes 1 +▁deposit 1 +▁Uzbekistán 1 +▁promueve 1 +▁preservar 1 +▁suministros 1 +▁promueva 1 +▁investiga 1 +iese 1 +lógico 1 +▁significado 1 +ducido 1 +▁encomia 1 +lli 1 +▁detectar 1 +▁Estonia 1 +▁salvar 1 +for 1 +Bissau 1 +▁sufragar 1 +lares 1 +▁piezas 1 +▁especializada 1 +iones 1 +▁imperio 1 +▁contraído 1 +zu 1 +▁cuán 1 +by 1 +▁Espacio 1 +ónica 1 +▁autónomos 1 +▁interesado 1 +▁socios 1 +tive 1 +SI 1 +▁ratificar 1 +▁tercio 1 +▁radical 1 +▁genética 1 +▁obtiene 1 +▁destacó 1 +▁consideraciones 1 +▁creando 1 +párr 1 +▁Nicaragua 1 +▁PIB 1 +▁subvenciones 1 +▁mutuo 1 +▁aérea 1 +▁complementa 1 +▁círculo 1 +▁remuneración 1 +▁contribuya 1 +▁transporta 1 +▁utilizada 1 +▁existir 1 +▁Modelo 1 +▁13.00 1 +pol 1 +▁clasifica 1 +▁2008-2009 1 +▁expansión 1 +▁poderes 1 +▁alcanza 1 +▁Varios 1 +▁plat 1 +▁renuncia 1 +▁actualizada 1 +▁planteado 1 +véanse 1 +▁religiones 1 +▁vacantes 1 +▁apoye 1 +▁gal 1 +▁Jurídicos 1 +▁trabajado 1 +▁estación 1 +quí 1 +▁sometida 1 +▁págs 1 +▁químicas 1 +▁alcanzados 1 +▁Ministerial 1 +▁verse 1 +▁sujetos 1 +▁coordinada 1 +3/ 1 +Qu 1 +▁subsidiarios 1 +▁cerrado 1 +▁clic 1 +▁Podría 1 +▁sólido 1 +▁piloto 1 +▁CON 1 +▁actores 1 +mpi 1 +ö 1 +▁comprobar 1 +▁importación 1 +▁Río 1 +▁reúne 1 +▁mejorado 1 +▁barco 1 +▁peticiones 1 +▁buscando 1 +▁Roja 1 +▁Sociedad 1 +▁embarazo 1 +▁recuerdo 1 +▁vídeo 1 +▁reclutamiento 1 +▁profesor 1 +struct 1 +▁titular 1 +▁británico 1 +▁silencio 1 +▁apelación 1 +▁debatir 1 +tric 1 +▁pacíficos 1 +▁Quizá 1 +▁cruza 1 +▁estructurales 1 +▁plantean 1 +▁aleja 1 +▁golpe 1 +▁cuánto 1 +ándolo 1 +ndar 1 +▁Reco 1 +▁monta 1 +▁Ciudad 1 +▁piedra 1 +▁extremo 1 +tch 1 +ENT 1 +▁elimina 1 +▁producen 1 +ung 1 +▁atmósfera 1 +▁femenina 1 +▁respectivas 1 +▁recauda 1 +▁sostiene 1 +▁secciones 1 +▁dedicar 1 +▁nombrado 1 +gina 1 +har 1 +▁universidades 1 +▁útiles 1 +▁Faso 1 +▁Consulta 1 +▁impulsa 1 +▁aprueba 1 +▁Dos 1 +bal 1 +▁destinada 1 +▁temperatura 1 +▁Ante 1 +▁precedentes 1 +▁romaníes 1 +▁libera 1 +▁Salón 1 +iti 1 +▁OSSI 1 +▁sugiere 1 +cula 1 +▁Toda 1 +▁relacionado 1 +▁apoyado 1 +rina 1 +▁recibidas 1 +▁convenio 1 +▁impuestas 1 +▁Cruz 1 +▁negativas 1 +▁Soy 1 +▁constructiva 1 +▁periodistas 1 +▁conoce 1 +peri 1 +▁Plataforma 1 +▁conducir 1 +▁legislativa 1 +▁Mujeres 1 +▁claridad 1 +▁asumido 1 +▁ocasiona 1 +▁conocida 1 +cultural 1 +▁eres 1 +▁ocurrió 1 +▁fracaso 1 +▁masiva 1 +▁deseen 1 +uar 1 +▁ordenamiento 1 +▁Unidad 1 +▁religiosos 1 +SO 1 +▁flujo 1 +▁muro 1 +▁75 1 +▁aquellas 1 +TO 1 +Me 1 +▁CD 1 +IG 1 +▁Tre 1 +ificó 1 +▁propósitos 1 +▁orgánica 1 +▁sugirió 1 +▁cometidas 1 +▁afrontar 1 +▁premio 1 +▁consagra 1 +▁hago 1 +CH 1 +▁bienvenida 1 +▁opina 1 +▁división 1 +▁socio 1 +cy 1 +▁Defensor 1 +▁impresión 1 +▁limitación 1 +2005 1 +▁positivas 1 +▁supera 1 +▁Yugoslava 1 +▁Habie 1 +▁detrás 1 +▁nave 1 +venga 1 +pet 1 +▁elegidos 1 +ik 1 +▁fomenta 1 +CRC 1 +▁Dicha 1 +** 1 +▁encuentro 1 +vivi 1 +ich 1 +▁Primero 1 +▁conscientes 1 +▁Gal 1 +▁Unida 1 +ft 1 +▁Sé 1 +▁Th 1 +▁Finalmente 1 +▁epidemia 1 +▁ayude 1 +▁comparti 1 +▁Imp 1 +▁viendo 1 +▁repatriación 1 +▁cierre 1 +▁combate 1 +▁aumente 1 +▁Panamá 1 +▁terrorista 1 +▁ADN 1 +▁décadas 1 +▁suspensión 1 +▁oradora 1 +▁depósito 1 +Los 1 +▁explosivos 1 +▁subregional 1 +▁1989 1 +sistir 1 +▁probar 1 +▁antecedentes 1 +▁oportuno 1 +▁probabilidad 1 +▁consideramos 1 +▁experto 1 +▁persistente 1 +▁informaciones 1 +▁Zambia 1 +▁océanos 1 +▁paciente 1 +▁hospitales 1 +▁Google 1 +▁objeciones 1 +▁Sexta 1 +▁Había 1 +▁sufren 1 +▁centrado 1 +%) 1 +▁socava 1 +▁reconocidos 1 +▁proporcionada 1 +▁resistencia 1 +▁publicó 1 +▁vacaciones 1 +▁ahorro 1 +▁industriales 1 +▁establecen 1 +cogiendo 1 +▁retorno 1 +▁Estaba 1 +▁culpa 1 +▁Posteriormente 1 +▁parlamentario 1 +▁reemplaza 1 +▁transfronterizo 1 +▁acreedores 1 +iana 1 +▁consultiva 1 +▁convino 1 +▁ocuparse 1 +▁extranjeras 1 +▁inspección 1 +▁Yemen 1 +▁carne 1 +▁obligatorio 1 +TI 1 +▁adoptando 1 +▁inventario 1 +▁Cri 1 +▁finanzas 1 +ige 1 +▁Emp 1 +plaza 1 +▁alquiler 1 +▁diseñar 1 +iéramos 1 +▁véase 1 +▁tren 1 +▁bruto 1 +▁universidad 1 +▁obligados 1 +▁transformación 1 +▁Investigaciones 1 +▁Jamaica 1 +▁escuchado 1 +▁complica 1 +lecomunicaciones 1 +▁Igualdad 1 +▁inmunidad 1 +pone 1 +▁académico 1 +▁perfil 1 +▁costes 1 +▁utilizarse 1 +▁gases 1 +▁cura 1 +▁(2003) 1 +▁Debido 1 +▁débil 1 +▁usan 1 +2006 1 +▁corresponden 1 +▁dará 1 +▁provincias 1 +6/ 1 +▁recomendado 1 +▁Guía 1 +▁Gra 1 +▁fiable 1 +▁Reitera 1 +▁publicidad 1 +▁gráfico 1 +▁prepara 1 +▁feliz 1 +▁migratorios 1 +▁cumplen 1 +▁Pese 1 +▁policial 1 +▁Lista 1 +▁generalmente 1 +▁campamentos 1 +▁estricta 1 +▁litoral 1 +ACIÓN 1 +▁requerir 1 +▁asisten 1 +▁presos 1 +▁voluntario 1 +▁estancia 1 +▁DEL 1 +▁emprender 1 +▁agrupa 1 +MP 1 +▁compensa 1 +▁obligado 1 +▁previas 1 +▁apropiados 1 +▁extraño 1 +▁rapidez 1 +▁Bra 1 +▁veo 1 +▁traducción 1 +▁cero 1 +▁Madrid 1 +▁asiento 1 +▁disponen 1 +▁revolución 1 +▁Consolidación 1 +▁accesible 1 +▁ventaja 1 +▁Malí 1 +▁compañero 1 +▁vuelva 1 +▁orientado 1 +▁cumpli 1 +▁Burkina 1 +▁intervenir 1 +▁encarcela 1 +▁Che 1 +▁Observación 1 +▁10% 1 +▁Gi 1 +burg 1 +fico 1 +▁delante 1 +▁65 1 +▁Centroafricana 1 +▁aportaciones 1 +▁adquirido 1 +▁refieren 1 +▁afectar 1 +▁relaciona 1 +▁intercambiar 1 +▁ayer 1 +▁piel 1 +▁traslad 1 +▁supervivencia 1 +▁Participa 1 +▁20% 1 +zona 1 +▁irre 1 +▁exteriores 1 +▁2015 1 +ificar 1 +▁Chris 1 +▁Evaluación 1 +▁máquinas 1 +▁animal 1 +▁Law 1 +▁trataba 1 +▁Nosotros 1 +▁Zimbabwe 1 +▁viejo 1 +▁tradición 1 +▁55/2 1 +▁reglamentación 1 +▁Dicho 1 +▁negativos 1 +▁situada 1 +▁mensual 1 +▁permitiría 1 +▁examinará 1 +▁felicita 1 +▁abandonar 1 +▁compuesto 1 +▁escasa 1 +▁Oficial 1 +uri 1 +▁1373 1 +▁indirecta 1 +▁inciso 1 +PL 1 +▁vínculo 1 +▁indebido 1 +▁Transición 1 +▁imposición 1 +▁reacción 1 +▁mover 1 +▁ecosistemas 1 +▁océano 1 +▁brecha 1 +and 1 +▁inclusive 1 +▁incumbe 1 +▁cesación 1 +▁Fra 1 +▁llevando 1 +▁marina 1 +▁Puerto 1 +/2007/ 1 +▁asocia 1 +▁And 1 +OL 1 +▁pared 1 +▁Independiente 1 +▁peligrosos 1 +▁operativo 1 +▁Washington 1 +▁gratuito 1 +▁viernes 1 +▁recién 1 +@ 1 +▁provisionales 1 +▁iniciales 1 +▁ejerce 1 +ku 1 +pto 1 +▁permitió 1 +▁equidad 1 +▁siguiera 1 +▁Hoteles 1 +▁cercano 1 +▁constitución 1 +▁escolares 1 +▁viva 1 +▁llegada 1 +▁2002-2003 1 +▁Agencia 1 +▁Control 1 +▁declarar 1 +▁Benin 1 +▁Lucha 1 +▁identifica 1 +▁cielo 1 +RES 1 +▁Ven 1 +▁fuese 1 +▁liberación 1 +▁desigualdad 1 +▁estarán 1 +ther 1 +▁Nivel 1 +▁marítimo 1 +▁tropas 1 +▁sensible 1 +▁(2006) 1 +▁parlamentaria 1 +▁Registro 1 +▁apunta 1 +▁Haya 1 +▁encontraba 1 +▁(2005) 1 +▁escrita 1 +mí 1 +▁Liechtenstein 1 +ure 1 +ática 1 +▁incentivos 1 +▁150 1 +▁aplique 1 +▁Celebra 1 +▁discriminatoria 1 +LE 1 +▁regulación 1 +▁embarazada 1 +▁proseguir 1 +▁gira 1 +▁pasada 1 +▁ética 1 +▁servidor 1 +▁residuos 1 +▁cancela 1 +▁privación 1 +▁duro 1 +▁participó 1 +▁formulario 1 +▁contribuirá 1 +▁1.000 1 +venta 1 +▁presentarse 1 +▁Directora 1 +poli 1 +▁18.00 1 +ONU 1 +▁alianzas 1 +IL 1 +▁aporte 1 +▁juvenil 1 +▁Cambio 1 +figura 1 +▁coordinado 1 +▁considerando 1 +▁desertificación 1 +▁ilustra 1 +▁protegido 1 +▁propicia 1 +▁describir 1 +▁enseñar 1 +▁ricos 1 +▁Fuente 1 +▁degradantes 1 +▁jurisprudencia 1 +▁matemática 1 +▁Durban 1 +▁1980 1 +▁viajar 1 +cito 1 +▁juventud 1 +▁contenidas 1 +▁Formas 1 +▁determinada 1 +▁procesamiento 1 +▁actualización 1 +sent 1 +▁Indígenas 1 +▁avanzada 1 +▁destacado 1 +▁antiguos 1 +▁diplomático 1 +▁sostenido 1 +▁sensación 1 +▁elabora 1 +▁cumpla 1 +▁núcleo 1 +▁sequía 1 +Si 1 +▁creada 1 +▁empecé 1 +▁tomen 1 +éis 1 +▁facultades 1 +CONF 1 +▁desarrollando 1 +▁apoyando 1 +▁evoluciona 1 +▁denominado 1 +▁contienen 1 +IF 1 +▁asegurarse 1 +▁ponerse 1 +▁Dominicana 1 +▁estadísticos 1 +▁comenzado 1 +▁secuestro 1 +dujeron 1 +▁divulgación 1 +▁encaminados 1 +▁perjudicial 1 +▁recibieron 1 +▁subrayó 1 +▁presten 1 +▁gar 1 +▁permanecer 1 +▁hechas 1 +▁obstáculo 1 +▁facultad 1 +▁Mc 1 +▁memorando 1 +▁expediente 1 +ducción 1 +▁dispositivo 1 +▁magnitud 1 +▁perjudica 1 +▁distribuido 1 +▁olvidar 1 +▁capitales 1 +▁reclama 1 +▁vehículo 1 +▁Casa 1 +▁respaldo 1 +▁décimo 1 +▁with 1 +▁64 1 +▁Siempre 1 +▁delincuentes 1 +▁africana 1 +DP 1 +▁Están 1 +▁observado 1 +▁Monterrey 1 +▁asesina 1 +king 1 +cara 1 +▁presunto 1 +▁lunes 1 +▁cárcel 1 +▁goce 1 +▁módulo 1 +▁escasez 1 +▁asciende 1 +▁regresa 1 +▁Tenía 1 +▁neuro 1 +▁iraquíes 1 +▁dispositivos 1 +▁ubica 1 +▁consideren 1 +▁Barcelona 1 +▁incidencia 1 +pel 1 +▁piso 1 +▁poseedores 1 +dez 1 +▁potable 1 +▁subtema 1 +▁departamento 1 +▁Ku 1 +▁ajustar 1 +▁sencillo 1 +▁considero 1 +cuerda 1 +▁banco 1 +▁Estupefacientes 1 +▁energético 1 +▁lógica 1 +▁tira 1 +▁necesitaba 1 +▁perfectamente 1 +▁ministros 1 +▁testimonio 1 +▁asegure 1 +▁Provisional 1 +▁cónyuge 1 +▁solicite 1 +▁Final 1 +▁líder 1 +▁retira 1 +2.000 1 +▁perfeccion 1 +Estados 1 +▁mejorando 1 +▁resumida 1 +cabeza 1 +▁iguales 1 +marca 1 +CD 1 +▁sugerencias 1 +▁Namibia 1 +▁promoviendo 1 +▁disfruta 1 +cierto 1 +▁generado 1 +▁accidentes 1 +ate 1 +▁socioeconómico 1 +▁resultantes 1 +ducto 1 +▁realista 1 +▁mutuamente 1 +▁iglesia 1 +▁posesión 1 +FOR 1 +▁habita 1 +▁TV 1 +▁Vo 1 +▁brillante 1 +▁prosperidad 1 +▁pornografía 1 +▁crece 1 +▁secreta 1 +▁pens 1 +▁maternidad 1 +▁afirmó 1 +moni 1 +▁acabar 1 +▁admisibilidad 1 +ture 1 +▁VII 1 +▁docente 1 +▁cuidados 1 +▁Jamahiriya 1 +▁estábamos 1 +▁interesada 1 +▁gestionar 1 +poner 1 +▁chica 1 +▁Documentación 1 +▁inhumanos 1 +▁progresiva 1 +▁vuelto 1 +▁decisivo 1 +▁valioso 1 +▁plaza 1 +▁controversia 1 +▁Delincuencia 1 +▁orgullo 1 +▁Paul 1 +▁suscita 1 +▁autorizada 1 +▁funcione 1 +▁contabilidad 1 +▁Marte 1 +▁consumidor 1 +▁proporcionan 1 +lau 1 +▁almacena 1 +▁señales 1 +▁llamamos 1 +EM 1 +▁deudor 1 +.4/ 1 +▁reunió 1 +▁desearía 1 +▁penitenciario 1 +▁árboles 1 +▁informático 1 +▁sospechoso 1 +▁valiosa 1 +▁report 1 +▁decreto 1 +▁agencias 1 +▁EL 1 +▁oficialmente 1 +▁OMS 1 +▁forzada 1 +▁vota 1 +▁propiedades 1 +▁prohibido 1 +▁Honduras 1 +▁equitativo 1 +▁Public 1 +▁Considera 1 +▁celebre 1 +▁empeño 1 +gun 1 +▁mini 1 +▁definitivo 1 +▁sentado 1 +▁crueles 1 +spir 1 +tiene 1 +▁Normas 1 +lón 1 +▁acreedor 1 +▁mutua 1 +▁MONUC 1 +▁diamantes 1 +▁fábrica 1 +▁parlamento 1 +▁órdenes 1 +▁sindicatos 1 +▁vender 1 +▁belleza 1 +▁orientaciones 1 +▁lanzamiento 1 +▁condiciona 1 +▁fiscalización 1 +▁directivo 1 +▁óptima 1 +▁corregir 1 +▁incluía 1 +ling 1 +▁Observaciones 1 +NI 1 +▁hablamos 1 +▁estudiando 1 +DH 1 +▁extensión 1 +▁prohibir 1 +▁EN 1 +▁aspira 1 +▁sueldos 1 +CR 1 +/10 1 +▁especialistas 1 +▁esclavitud 1 +▁café 1 +▁modificado 1 +▁experimental 1 +Di 1 +▁productivo 1 +▁estadística 1 +▁justificar 1 +▁(2002) 1 +▁consultivo 1 +▁ingeniería 1 +▁estrictamente 1 +▁cerrar 1 +▁inspeccion 1 +▁negociar 1 +cular 1 +▁agradecería 1 +▁estricto 1 +▁curs 1 +“ 1 +▁cometer 1 +mero 1 +▁talleres 1 +▁SE 1 +▁siguiendo 1 +▁legitimidad 1 +▁oculta 1 +▁Deseo 1 +▁unilateral 1 +▁situ 1 +▁pesquera 1 +▁reclusos 1 +▁tejido 1 +▁pensaba 1 +▁exclusiva 1 +▁compro 1 +▁Tayikistán 1 +ité 1 +stituye 1 +▁1988 1 +▁preparando 1 +▁World 1 +▁paisaje 1 +▁absoluta 1 +iller 1 +▁enunciados 1 +▁fiduciario 1 +▁restablecer 1 +▁genial 1 +▁inevitable 1 +▁tarifa 1 +▁Kar 1 +▁manifestar 1 +▁indemniza 1 +/64/ 1 +▁complementario 1 +▁recordó 1 +▁deterioro 1 +FR 1 +▁visado 1 +▁Fer 1 +▁fórmula 1 +▁psicológica 1 +▁cultiva 1 +izo 1 +▁actitudes 1 +▁pelo 1 +▁Federativa 1 +▁invasión 1 +▁mostrado 1 +▁teníamos 1 +▁Consultivo 1 +▁mono 1 +▁Regla 1 +▁diagnóstico 1 +▁proyecta 1 +▁septentrional 1 +▁decisiva 1 +▁Respect 1 +▁municipales 1 +ish 1 +▁Libertad 1 +▁reiterar 1 +▁convocar 1 +▁ventana 1 +goberna 1 +▁Fiscalía 1 +▁Mozambique 1 +▁constituía 1 +▁lujo 1 +izaciones 1 +-5 1 +▁Índice 1 +▁transformar 1 +/2008/ 1 +▁Sáhara 1 +▁bancaria 1 +▁Nunca 1 +▁80% 1 +▁mediados 1 +▁sustituir 1 +▁usado 1 +à 1 +▁UNMIK 1 +▁respaldar 1 +▁autoriza 1 +rey 1 +▁directores 1 +▁personalidad 1 +▁bloque 1 +▁incorporado 1 +▁Preparatorio 1 +▁ecológica 1 +ende 1 +▁altamente 1 +▁Tur 1 +▁accidente 1 +▁formularon 1 +▁maravilloso 1 +▁virtual 1 +▁productiva 1 +▁basta 1 +▁estimula 1 +Í 1 +▁Inspección 1 +▁francesa 1 +▁incompatible 1 +▁privilegio 1 +▁vivía 1 +▁procurar 1 +▁iraquí 1 +▁400 1 +▁Cualquier 1 +▁excepciones 1 +ambi 1 +▁catástrofe 1 +▁Guerra 1 +▁disparidad 1 +▁suele 1 +▁continuará 1 +▁presupuestaria 1 +IDA 1 +▁plantilla 1 +▁acordó 1 +ME 1 +▁tradiciones 1 +▁instó 1 +WP 1 +▁poca 1 +▁afectado 1 +▁importaciones 1 +▁acusa 1 +▁Existe 1 +▁distribuye 1 +▁cuantía 1 +▁multa 1 +▁preparatorio 1 +▁Planificación 1 +▁concienciación 1 +▁automóvil 1 +▁construido 1 +Mi 1 +▁centrarse 1 +▁Sub 1 +▁respeten 1 +Com 1 +▁protegida 1 +▁respira 1 +centr 1 +vía 1 +ib 1 +С 1 +▁Habitaciones 1 +▁Mejor 1 +▁diría 1 +▁introducido 1 +▁podremos 1 +▁mitigar 1 +▁morir 1 +gráfico 1 +▁PRO 1 +▁Tri 1 +▁añadido 1 +▁exhaustiva 1 +▁pasajeros 1 +▁irregular 1 +▁reduce 1 +▁policiales 1 +▁inestabilidad 1 +▁inseguridad 1 +▁propicio 1 +▁conservar 1 +▁vienen 1 +▁reviste 1 +▁funcional 1 +▁hablado 1 +▁cuestiona 1 +▁Cal 1 +▁generalizada 1 +▁Montreal 1 +▁jueves 1 +▁hermoso 1 +▁porteador 1 +▁Auditores 1 +ence 1 +▁recopilación 1 +▁protege 1 +▁viaja 1 +▁acordar 1 +▁Natural 1 +▁fundamenta 1 +▁puente 1 +▁anfitrión 1 +▁dictamen 1 +▁ejecuciones 1 +▁estupefacientes 1 +о 1 +▁brazo 1 +ive 1 +▁placer 1 +▁contamina 1 +▁automáticamente 1 +▁otorgar 1 +▁separada 1 +▁perfecto 1 +▁remitir 1 +▁implanta 1 +▁dificultad 1 +▁decidida 1 +OC 1 +DO 1 +▁quedó 1 +▁detección 1 +▁Mediterráneo 1 +▁emprendido 1 +▁invernadero 1 +▁produjo 1 +▁Existen 1 +▁étnica 1 +▁armonizar 1 +▁señalaron 1 +▁urbana 1 +▁degradación 1 +▁museo 1 +▁asesores 1 +activ 1 +▁literalmente 1 +▁incertidumbre 1 +tendiendo 1 +212) 1 +▁dispara 1 +▁30% 1 +▁manipula 1 +▁Paraguay 1 +▁poderoso 1 +▁efectuado 1 +▁mantienen 1 +▁imagin 1 +▁nombrar 1 +▁CNUDMI 1 +▁cotidiana 1 +▁refugio 1 +▁963- 1 +▁Armas 1 +▁preserva 1 +▁sentar 1 +▁titulares 1 +zz 1 +▁resulte 1 +▁Creemos 1 +▁preocupante 1 +.400 1 +▁profundidad 1 +▁bloqueo 1 +& 1 +▁verificar 1 +EL 1 +▁crecer 1 +▁PYME 1 +▁socioeconómica 1 +▁administradora 1 +▁celebraron 1 +To 1 ++ 1 +▁Jurídica 1 +▁hermosa 1 +▁famoso 1 +▁actualizado 1 +▁inadmisible 1 +▁marítima 1 +▁resuelto 1 +▁semejante 1 +TRA 1 +ducir 1 +▁debilita 1 +berg 1 +ä 1 +▁intensa 1 +htm 1 +▁incrementa 1 +▁Alimentación 1 +▁desayuno 1 +▁eléctrica 1 +▁puntual 1 +▁forzoso 1 +▁pensión 1 +▁archivo 1 +▁globales 1 +▁arbitraje 1 +▁tendremos 1 +▁excepto 1 +▁club 1 +▁habilidades 1 +▁Auditor 1 +▁Emiratos 1 +▁afuera 1 +▁montón 1 +▁complementar 1 +▁conocí 1 +ísima 1 +0/ 1 +▁esperaba 1 +▁reembolso 1 +▁convirtió 1 +▁emplear 1 +COP 1 +▁Mongolia 1 +▁discapacidades 1 +▁Global 1 +▁Sabemos 1 +▁seriamente 1 +▁contratista 1 +▁Nu 1 +▁conduce 1 +▁excede 1 +▁prever 1 +▁llevará 1 +▁solía 1 +▁Lamentablemente 1 +▁aeronave 1 +▁navegación 1 +▁” 1 +▁Voy 1 +▁gravemente 1 +▁Bahrein 1 +▁Escuela 1 +▁sucedió 1 +▁soporta 1 +pie 1 +PR 1 +▁ayudará 1 +▁auxiliar 1 +▁entablar 1 +▁piscina 1 +▁misiles 1 +icidad 1 +▁envió 1 +▁poniendo 1 +▁móvil 1 +▁explorar 1 +▁Seguimiento 1 +▁damos 1 +▁Vamos 1 +▁negar 1 +▁artificial 1 +▁Monetario 1 +▁LOS 1 +▁editor 1 +▁asociado 1 +▁clínica 1 +▁continuidad 1 +▁actúe 1 +2009 1 +.300 1 +▁dotación 1 +▁Djibouti 1 +▁FMAM 1 +▁saludable 1 +▁flota 1 +PT 1 +▁cable 1 +▁frustra 1 +reestructuración 1 +▁minera 1 +▁ropa 1 +▁73 1 +▁unilaterales 1 +▁nutri 1 +▁mencionada 1 +▁suprimir 1 +▁aprendido 1 +▁ubicado 1 +▁agradable 1 +▁enemigo 1 +▁repercusión 1 +mbe 1 +tienda 1 +▁naturalmente 1 +anti 1 +▁fijar 1 +uelta 1 +▁desplegado 1 +▁recogida 1 +▁lleno 1 +▁discrimina 1 +▁nutrición 1 +▁afirmación 1 +▁contractual 1 +▁pintura 1 +▁hielo 1 +▁taller 1 +▁cine 1 +▁maestro 1 +▁tuvimos 1 +▁eventual 1 +▁lecciones 1 +▁evita 1 +▁presupuest 1 +▁coincide 1 +▁Fiduciario 1 +▁Mecanismo 1 +▁Ribera 1 +▁conmigo 1 +▁mueve 1 +▁electo 1 +[ 1 +▁recibida 1 +▁costumbre 1 +▁dominio 1 +▁empeora 1 +▁monetaria 1 +▁objetiva 1 +▁conversación 1 +▁origina 1 +▁disminuye 1 +▁supervisa 1 +▁Fiscalización 1 +▁bosque 1 +▁dólar 1 +▁conocen 1 +▁musulmanes 1 +▁Nuevo 1 +▁Somos 1 +▁obligada 1 +▁implementa 1 +▁circula 1 +▁pobre 1 +▁bacteria 1 +▁discapacitados 1 +▁Decisión 1 +▁edición 1 +dal 1 +▁distinto 1 +▁alentador 1 +▁encontrará 1 +▁talibanes 1 +▁vacío 1 +▁evalu 1 +▁doy 1 +▁ronda 1 +▁patrocinado 1 +▁bienal 1 +▁dieta 1 +▁colores 1 +▁vela 1 +▁doctor 1 +.600 1 +▁doce 1 +mail 1 +▁vigila 1 +▁forestal 1 +▁Proceso 1 +▁Consciente 1 +▁Ésta 1 +▁móviles 1 +▁mantenerse 1 +▁rendir 1 +▁canales 1 +▁registrar 1 +▁actualizar 1 +▁transfronteriza 1 +▁oralmente 1 +▁noticia 1 +güe 1 +vision 1 +▁Madagascar 1 +▁panorama 1 +▁importe 1 +▁infracciones 1 +▁censo 1 +▁empleadores 1 +▁coordinadores 1 +▁Financiero 1 +▁listo 1 +▁concentración 1 +▁atraer 1 +▁secc 1 +ney 1 +▁precisión 1 +▁Transporte 1 +▁Muy 1 +▁exploración 1 +▁Cuarta 1 +▁reflejar 1 +▁rojo 1 +▁enjuicia 1 +▁facilitación 1 +▁Ejército 1 +▁auténtica 1 +▁patrones 1 +▁llevada 1 +▁beneficiarse 1 +▁relaja 1 +NEPAD 1 +▁lesiones 1 +▁calor 1 +▁pronta 1 +▁Organizada 1 +▁pagado 1 +▁municipal 1 +▁electorales 1 +front 1 +▁adjunto 1 +▁umbral 1 +▁Abu 1 +▁cuándo 1 +▁Plaza 1 +▁Ze 1 +posiciones 1 +▁estructural 1 +▁planteadas 1 +▁reducida 1 +▁compartido 1 +2010 1 +▁Cabo 1 +▁aportación 1 +▁imputa 1 +▁sujeto 1 +▁transmite 1 +▁internet 1 +▁ordenador 1 +president 1 +▁minuto 1 +▁Copenhague 1 +▁alivio 1 +▁infecta 1 +kh 1 +uela 1 +▁preguntó 1 +▁sinergia 1 +▁wi 1 +▁alegaciones 1 +pusiera 1 +▁adjunta 1 +▁resultó 1 +▁basándose 1 +▁orientar 1 +▁aplicará 1 +▁correctamente 1 +/9 1 +▁pendiente 1 +▁reacciona 1 +▁validez 1 +▁ganancias 1 +▁confidencial 1 +▁usamos 1 +▁originales 1 +▁perpetrado 1 +▁recicla 1 +▁visible 1 +▁basarse 1 +patria 1 +▁comprador 1 +IR 1 +▁neutral 1 +▁habiendo 1 +▁ocupante 1 +▁acusaciones 1 +▁intentando 1 +▁obviamente 1 +▁preámbulo 1 +▁rechazo 1 +▁facilidad 1 +▁respondió 1 +▁emisión 1 +▁distinta 1 +▁disminuir 1 +▁hermana 1 +▁jurídicamente 1 +▁agradece 1 +▁falla 1 +▁deseamos 1 +▁Personas 1 +▁tragedia 1 +▁símbolo 1 +▁Aún 1 +▁aborto 1 +▁Casi 1 +▁Spa 1 +▁instalar 1 +▁antipersonal 1 +▁heridos 1 +rito 1 +▁triste 1 +▁consideraba 1 +▁liberal 1 +decisión 1 +▁permanece 1 +▁desgracia 1 +▁Atómica 1 +▁háb 1 +▁complicado 1 +.800 1 +▁malo 1 +▁desplaza 1 +▁impresionante 1 +▁alianza 1 +▁mío 1 +tente 1 +▁lanzar 1 +▁Situación 1 +▁droga 1 +▁parti 1 +▁United 1 +▁prórroga 1 +▁ofrecido 1 +▁emitido 1 +▁USD 1 +ding 1 +▁1967 1 +▁pista 1 +ç 1 +▁Johannesburgo 1 +▁macroeconómica 1 +▁transcurrido 1 +HRC 1 +▁neto 1 +▁40% 1 +▁observancia 1 +mental 1 +▁enfrentamiento 1 +CIA 1 +▁desigualdades 1 +▁secundario 1 +▁TNP 1 +▁Kong 1 +▁venir 1 +▁ruido 1 +▁sustenta 1 +▁abastecimiento 1 +▁circun 1 +▁maravillosa 1 +▁matricul 1 +▁Hong 1 +▁cubierta 1 +lement 1 +▁miércoles 1 +▁insiste 1 +cualesquiera 1 +▁indígena 1 +▁saca 1 +▁administrar 1 +▁olvida 1 +▁empezamos 1 +▁Parece 1 +▁restantes 1 +▁daña 1 +▁descubrimiento 1 +▁forzado 1 +Quién 1 +III 1 +▁Público 1 +▁clausura 1 +▁simultánea 1 +▁reunido 1 +▁caída 1 +▁minoría 1 +▁emocional 1 +CRP 1 +▁conciliación 1 +▁mediación 1 +▁vele 1 +▁Resumen 1 +▁derivadas 1 +Esta 1 +▁US 1 +▁amparo 1 +prime 1 +▁ocurrir 1 +▁preparada 1 +▁Preocupa 1 +▁refiero 1 +▁Trata 1 +/15 1 +▁custodia 1 +▁relevante 1 +▁incluirá 1 +▁constata 1 +▁escena 1 +▁necesite 1 +▁jamás 1 +▁Puesto 1 +▁elegante 1 +▁exposiciones 1 +▁negativo 1 +▁enfoca 1 +▁mostrarles 1 +▁negra 1 +▁asequible 1 +FCCC 1 +▁rango 1 +▁sujeción 1 +▁forestales 1 +▁aspiraciones 1 +▁obtención 1 +▁tenor 1 +▁africano 1 +▁propaga 1 +▁$ 1 +▁experimentado 1 +CEDAW 1 +▁equilibrada 1 +▁proclama 1 +▁hostilidades 1 +▁certificación 1 +▁iv 1 +▁acogió 1 +▁Militar 1 +▁ultra 1 +▁mitigación 1 +▁afgano 1 +▁volvió 1 +▁interpretar 1 +▁Financiera 1 +▁Meridional 1 +▁concluyó 1 +▁estudiante 1 +▁Vigilancia 1 +▁auspicios 1 +▁obtenida 1 +▁secuencia 1 +▁UNOPS 1 +▁David 1 +▁consultores 1 +▁innovadoras 1 +▁viento 1 +▁compartida 1 +▁vieron 1 +scentralización 1 +▁estimular 1 +▁sustancia 1 +▁Bel 1 +▁identificado 1 +EX 1 +▁profundiza 1 +▁Kha 1 +▁previamente 1 +▁preparatoria 1 +▁estimación 1 +logía 1 +▁académica 1 +▁directriz 1 +▁conviene 1 +▁retiro 1 +▁obligar 1 +▁Togo 1 +▁aproxima 1 +▁acuífero 1 +.100 1 +▁XX 1 +▁repente 1 +▁nace 1 +▁defensor 1 +▁Nadie 1 +▁inflación 1 +▁TIC 1 +▁admitir 1 +▁afecte 1 +▁desempeñado 1 +▁120 1 +▁tomadas 1 +▁coordinador 1 +▁reconozca 1 +▁Principales 1 +▁tensiones 1 +▁Marino 1 +▁aviso 1 +▁st 1 +▁Elección 1 +▁Queremos 1 +▁elevar 1 +▁clasificación 1 +▁peligroso 1 +lah 1 +▁quinta 1 +▁esencialmente 1 +XV 1 +▁sirio 1 +▁ciertamente 1 +▁requerido 1 +▁surge 1 +3.000 1 +▁Comunicación 1 +▁Tampoco 1 +▁reflexión 1 +▁Operación 1 +▁viabilidad 1 +igi 1 +▁provocado 1 +pondría 1 +▁prorrogar 1 +▁pierde 1 +▁recopila 1 +▁prestará 1 +▁candidato 1 +▁cuente 1 +▁averiguar 1 +2008 1 +▁+ 1 +▁alcohol 1 +▁destruir 1 +▁seguida 1 +▁cobra 1 +▁cola 1 +ICEF 1 +▁excesivo 1 +▁vivimos 1 +▁incaut 1 +▁creatividad 1 +sobre 1 +▁equivale 1 +▁elaborando 1 +▁batalla 1 +▁anunció 1 +▁logística 1 +▁ofrezca 1 +▁Todavía 1 +▁constituido 1 +chu 1 +▁enérgicamente 1 +▁llevaron 1 +▁perro 1 +▁Consideramos 1 +▁Mundo 1 +ándola 1 +▁Park 1 +▁oír 1 +tif 1 +▁pidieron 1 +▁seguimos 1 +Cuándo 1 +▁vendedor 1 +▁compila 1 +▁actualiza 1 +▁individuo 1 +▁Sírva 1 +▁Estocolmo 1 +▁patrón 1 +▁despert 1 +propia 1 +▁contenida 1 +▁sistemático 1 +▁transferir 1 +▁objeción 1 +▁pasaporte 1 +▁biológico 1 +▁delegados 1 +▁"¡ 1 +▁especializado 1 +▁ozono 1 +▁impreso 1 +▁comodidad 1 +▁inicialmente 1 +▁genes 1 +▁propagación 1 +▁She 1 +▁manifestado 1 +▁referirse 1 +cier 1 +▁percepción 1 +▁trabajadoras 1 +▁dificulta 1 +▁inventa 1 +▁Pregunta 1 +▁desapariciones 1 +▁solicitó 1 +▁sospecha 1 +▁Transnacional 1 +▁innovadores 1 +▁2011 1 +▁reunirse 1 +▁incidente 1 +▁cautela 1 +▁domicilio 1 +▁asociadas 1 +▁modifica 1 +▁recluta 1 +▁candidatura 1 +▁equipada 1 +▁imprescindible 1 +▁universitario 1 +▁renovación 1 +▁conlleva 1 +▁votantes 1 +▁72 1 +▁implementación 1 +▁proporcionando 1 +▁subregión 1 +▁Productos 1 +▁ascenso 1 +▁parto 1 +▁extraña 1 +▁Windows 1 +▁empezando 1 +▁Comisiones 1 +▁previsible 1 +▁estrecho 1 +▁prorrog 1 +▁1986 1 +view 1 +▁poderosa 1 +▁localidad 1 +▁interroga 1 +▁pedimos 1 +ić 1 +▁electro 1 +▁90% 1 +▁discusión 1 +▁codifica 1 +▁planea 1 +clav 1 +▁envejecimiento 1 +▁informática 1 +▁ejecutiva 1 +sistió 1 +presupuestarios 1 +▁Industrial 1 +▁cárceles 1 +▁convierta 1 +▁transacción 1 +▁alemán 1 +▁circunstancia 1 +▁civilizaciones 1 +▁dedicación 1 +▁100% 1 +▁medianas 1 +▁veía 1 +▁peores 1 +▁acusación 1 +▁Principal 1 +▁preliminares 1 +▁unanimidad 1 +▁Alineados 1 +▁gigante 1 +▁favorecer 1 +▁responda 1 +▁borra 1 +▁Report 1 +▁oración 1 +▁manteniendo 1 +▁desaparece 1 +▁rectores 1 +▁colocar 1 +▁gusto 1 +▁acoso 1 +▁falsa 1 +▁acondicionado 1 +▁oficioso 1 +▁sufrir 1 +▁desaparecido 1 +▁estableciendo 1 +▁singular 1 +▁Organizaciones 1 +▁becas 1 +Sub 1 +▁parcialmente 1 +ortalecimiento 1 +▁bancario 1 +▁limpieza 1 +▁Barbados 1 +▁fruto 1 +▁querido 1 +▁padece 1 +▁econom 1 +▁Botswana 1 +▁Género 1 +▁adherirse 1 +▁disminuido 1 +▁Conforme 1 +▁guardia 1 +▁techo 1 +▁cobrar 1 +▁verifica 1 +▁alfabetización 1 +▁plástico 1 +▁repetir 1 +▁escond 1 +poniendo 1 +▁boca 1 +▁interlocutores 1 +,000 1 +estructura 1 +▁factura 1 +▁proviene 1 +▁Fiji 1 +▁pacífico 1 +▁corrección 1 +▁Ud 1 +dista 1 +▁aptitudes 1 +▁jardín 1 +е 1 +▁entrenamiento 1 +▁prestó 1 +▁mirando 1 +▁conocemos 1 +scripciones 1 +▁defecto 1 +▁colaboradores 1 +▁blog 1 +más 1 +▁Comunicaciones 1 +▁soporte 1 +▁California 1 +▁Climático 1 +▁Empleo 1 +▁calificado 1 +▁consistente 1 +▁salón 1 +▁consultor 1 +▁desequilibrio 1 +▁Malawi 1 +▁teatro 1 +▁horario 1 +▁espiritual 1 +▁inaugura 1 +▁pandemia 1 +▁reinserción 1 +▁OSCE 1 +▁extender 1 +▁balance 1 +▁activista 1 +▁aboga 1 +▁desaparición 1 +▁prisiones 1 +▁interactivo 1 +▁Trabaja 1 +▁notificar 1 +▁atrapa 1 +estabiliza 1 +▁OOPS 1 +ffe 1 +▁intimida 1 +militar 1 +▁proveedor 1 +▁felicidad 1 +▁fiesta 1 +ibíd 1 +▁Científico 1 +▁aliviar 1 +▁conmemora 1 +▁FMI 1 +▁Administrativo 1 +▁lento 1 +▁cena 1 +▁Anti 1 +▁Ultraterrestre 1 +▁encontré 1 +▁reducciones 1 +▁amable 1 +▁formule 1 +▁configuración 1 +▁trasladar 1 +▁traducir 1 +▁doméstico 1 +▁prejuicio 1 +▁Iglesia 1 +▁proporcionó 1 +▁influir 1 +▁inscrito 1 +scendencia 1 +▁Éste 1 +▁traer 1 +▁confía 1 +▁tramita 1 +▁traza 1 +▁pieza 1 +▁encarar 1 +▁Capacitación 1 +▁Finanzas 1 +▁estabilización 1 +▁Frente 1 +▁contaminantes 1 +▁cuesta 1 +▁perfecta 1 +▁tramitación 1 +▁encaminada 1 +▁célula 1 +▁insistir 1 +Saben 1 +▁Mauritania 1 +▁molécula 1 +▁genital 1 +▁considerará 1 +Puede 1 +▁Directrices 1 +▁Recomendaciones 1 +▁musical 1 +▁tabaco 1 +▁UNITA 1 +▁conectar 1 +▁imparcialidad 1 +▁competitiva 1 +▁cometa 1 +▁56/2 1 +▁pedía 1 +▁Doy 1 +▁innovador 1 +▁Atlántico 1 +▁cerebral 1 +▁viviendo 1 +▁Empresa 1 +▁marido 1 +▁reanudar 1 +▁autorizar 1 +▁Balcanes 1 +▁cambió 1 +▁Tanto 1 +▁sentía 1 +▁controlado 1 +Música 1 +▁fronterizo 1 +▁terraza 1 +▁infección 1 +▁sectoriales 1 +▁cruce 1 +▁turco 1 +▁decidí 1 +▁Grandes 1 +▁procedente 1 +▁modern 1 +trop 1 +▁equilibrado 1 +▁aceite 1 +▁Dentro 1 +ística 1 +ão 1 +▁multiplica 1 +▁prohibida 1 +▁confisca 1 +▁explora 1 +▁hídricos 1 +▁inalienable 1 +▁trámite 1 +и 1 +▁conexiones 1 +▁estrés 1 +▁José 1 +▁franco 1 +▁actuando 1 +▁armonía 1 +▁Consenso 1 +▁compasión 1 +▁albergue 1 +▁discriminatorio 1 +▁simula 1 +▁à 1 +▁declarada 1 +▁Asentamientos 1 +▁arbitral 1 +▁designar 1 +▁bolsa 1 +CIÓN 1 +▁redacta 1 +▁auténtico 1 +▁descubierto 1 +▁desplegar 1 +▁válida 1 +▁ejecutado 1 +stitucion 1 +▁coloniales 1 +▁concilia 1 +▁testigo 1 +▁química 1 +▁violenta 1 +▁Pienso 1 +▁blanca 1 +▁inherente 1 +▁parecido 1 +▁elabore 1 +▁criatura 1 +▁obedece 1 +▁golpea 1 +▁fundada 1 +▁martes 1 +▁empezado 1 +▁estereotipos 1 +▁intersectorial 1 +▁lluvia 1 +▁violentos 1 +▁sobrevivir 1 +▁caliente 1 +▁redactar 1 +▁fila 1 +▁pura 1 +▁gestiones 1 +eciendo 1 +▁infracción 1 +▁ambicioso 1 +▁manejo 1 +▁acorde 1 +▁` 1 +▁literal 1 +▁versiones 1 +▁árbol 1 +▁hidro 1 +▁comprendida 1 +▁pudieron 1 +▁expres 1 +credit 1 +▁dinámico 1 +▁excluir 1 +▁Espacial 1 +▁Auto 1 +▁pierna 1 +▁decenas 1 +▁tensión 1 +▁manifestó 1 +▁proveniente 1 +▁reiteró 1 +▁subsistencia 1 +▁telefónica 1 +▁variable 1 +▁70% 1 +ident 1 +▁tutor 1 +▁Tercer 1 +▁Níger 1 +▁instituto 1 +▁lingüística 1 +▁satisfecho 1 +▁soberano 1 +▁referido 1 +▁colonial 1 +▁detecta 1 +▁logístico 1 +▁Libro 1 +▁digna 1 +.900 1 +Observador 1 +▁contenía 1 +▁homenaje 1 +▁rebeldes 1 +а 1 +▁perturba 1 +▁carbón 1 +▁niega 1 +▁1982 1 +▁concentrar 1 +▁gestiona 1 +▁italiano 1 +▁Electoral 1 +▁preparó 1 +▁was 1 +▁jornada 1 +# 1 +▁judío 1 +▁Club 1 +▁desglosados 1 +▁Poli 1 +▁trabajador 1 +gura 1 +▁reconociendo 1 +▁restricción 1 +▁refería 1 +▁ocupe 1 +▁pensado 1 +▁ratifica 1 +▁satisfactorio 1 +▁compensación 1 +▁Franja 1 +▁vieja 1 +▁paralela 1 +▁critica 1 +▁beneficiar 1 +▁Africano 1 +▁2020 1 +▁Basilea 1 +▁recíproca 1 +▁precedente 1 +▁campamento 1 +▁bloquea 1 +▁malaria 1 +▁lucro 1 +▁Rights 1 +▁1949 1 +▁explícitamente 1 +▁consagrados 1 +▁aceptada 1 +párrafo 1 +▁doctrina 1 +▁refuerza 1 +▁milenio 1 +▁emigr 1 +▁combatientes 1 +▁elaboró 1 +▁cosecha 1 +▁consigue 1 +▁refugiado 1 +▁creía 1 +▁Utilización 1 +▁informal 1 +▁armoniza 1 +▁legalidad 1 +▁apéndice 1 +▁privacidad 1 +▁Nations 1 +▁tangible 1 +▁liberar 1 +▁constituida 1 +▁habilita 1 +▁fal 1 +▁guarde 1 +▁aprende 1 +▁recae 1 +à 1 +▁cercana 1 +▁excluye 1 +▁intensifique 1 +▁George 1 +▁obvio 1 +▁autónoma 1 +▁explicó 1 +restablecimiento 1 +▁religioso 1 +▁1987 1 +▁talento 1 +▁Bush 1 +▁Trinidad 1 +▁convicción 1 +▁Permítanme 1 +▁espectacular 1 +▁incapacidad 1 +▁Formación 1 +▁racista 1 +▁signo 1 +▁auditor 1 +▁sirva 1 +▁visitantes 1 +▁helicóptero 1 +▁novedades 1 +▁recomendada 1 +▁empresarios 1 +▁caza 1 +▁adelanta 1 +Bueno 1 +▁afronta 1 +▁temporada 1 +LO 1 +/1999/ 1 +▁fax 1 +▁Presentación 1 +▁mutilación 1 +▁tuberculosis 1 +▁nervio 1 +▁plural 1 +▁borde 1 +▁subir 1 +▁lanzado 1 +É 1 +▁Bolivarian 1 +▁Secretaria 1 +▁ecológico 1 +▁Seminario 1 +▁arresto 1 +▁61/2 1 +▁rostro 1 +▁justificación 1 +1998 1 +▁DERECHO 1 +▁Mónaco 1 +▁trimestre 1 +▁Tareas 1 +óxido 1 +▁presiones 1 +▁definiciones 1 +▁creativa 1 +▁Kirguistán 1 +▁exhaustivo 1 +▁Reserva 1 +▁monitor 1 +▁atractivo 1 +▁Cuarto 1 +▁arroja 1 +▁reorganiza 1 +▁dispositiva 1 +▁huérfanos 1 +▁sudoriental 1 +▁pesquero 1 +▁from 1 +▁modificada 1 +▁Junto 1 +▁creativo 1 +disciplina 1 +▁Premio 1 +▁cuidadosamente 1 +script 1 +▁característica 1 +▁tolera 1 +▁biodiversidad 1 +▁frágil 1 +▁islámico 1 +▁terminó 1 +▁benefici 1 +▁admisión 1 +▁coral 1 +tendrá 1 +4.000 1 +▁Conclusiones 1 +▁aislamiento 1 +▁bombarde 1 +▁continental 1 +▁psicológico 1 +▁Racismo 1 +▁sindical 1 +▁1985 1 +▁vulnerable 1 +▁OTAN 1 +▁logró 1 +▁(2008) 1 +▁extrajudiciales 1 +▁OCDE 1 +▁ilumina 1 +▁convencer 1 +▁aviación 1 +ándonos 1 +▁escribió 1 +▁60% 1 +▁1970 1 +▁59/2 1 +▁sucedido 1 +▁exceso 1 +▁brutal 1 +▁lagunas 1 +▁contenga 1 +▁válido 1 +▁direcciones 1 +▁heridas 1 +▁25% 1 +▁descolonización 1 +▁siguieron 1 +▁desprende 1 +▁suspender 1 +▁kilo 1 +▁biología 1 +cora 1 +▁coordina 1 +Video 1 +▁Diálogo 1 +▁conveniencia 1 +▁XVII 1 +▁Libre 1 +▁arregl 1 +▁compone 1 +▁extraer 1 +▁bicicleta 1 +▁determinó 1 +▁columna 1 +▁culmina 1 +▁dulce 1 +▁Condición 1 +▁modernización 1 +▁pretend 1 +▁robo 1 +▁azúcar 1 +▁hipótesis 1 +▁Excmo 1 +▁Mixto 1 +▁serbios 1 +▁rueda 1 +▁nube 1 +▁confirmó 1 +▁emociones 1 +▁deficiente 1 +▁concedido 1 +▁alentó 1 +▁ancianos 1 +ICA 1 +▁inversores 1 +▁divisiones 1 +▁imperativo 1 +INF 1 +▁Martin 1 +▁mantuvo 1 +▁refleje 1 +▁creciendo 1 +▁extensa 1 +▁fijo 1 +dependientes 1 +▁Guyana 1 +▁Técnico 1 +▁cerrada 1 +▁menoscaba 1 +▁Tabago 1 +▁determine 1 +▁discutir 1 +▁cuidar 1 +▁prohib 1 +▁incluyó 1 +▁enmendada 1 +▁emitir 1 +▁intenciones 1 +▁relatores 1 +▁Enviado 1 +▁regalo 1 +▁caracter 1 +▁Gabón 1 +▁redunda 1 +▁agotado 1 +▁aplazar 1 +dministraciones 1 +▁carencia 1 +▁Bhután 1 +▁algoritmo 1 +▁higiene 1 +▁folleto 1 +▁concierne 1 +▁resolv 1 +CEDEAO 1 +▁Corrupción 1 +▁proteína 1 +▁migratoria 1 +▁capacitado 1 +▁atribuciones 1 +▁genético 1 +work 1 +▁Liberación 1 +▁concerniente 1 +▁secuestra 1 +▁impugna 1 +▁Trato 1 +▁panel 1 +▁National 1 +▁prisioneros 1 +▁referéndum 1 +▁transcurso 1 +▁amistad 1 +▁firmó 1 +▁abandono 1 +▁piensen 1 +▁recibí 1 +▁devolver 1 +verdad 1 +▁reprimir 1 +▁sentimos 1 +▁digno 1 +▁surgido 1 +▁confiar 1 +tendida 1 +▁cubre 1 +▁falsifica 1 +▁adición 1 +▁2012 1 +▁ministerio 1 +▁recompensa 1 +▁ingeni 1 +▁jubilación 1 +6.000 1 +▁Adelanto 1 +▁OSACT 1 +▁Turkmenistán 1 +▁idéntica 1 +▁famosa 1 +▁57/2 1 +▁paralelo 1 +Ó 1 +▁apoyó 1 +▁cápita 1 +▁inspectores 1 +▁indique 1 +▁cubano 1 +▁haciéndo 1 +® 1 +▁estudie 1 +▁fruta 1 +▁proteja 1 +▁Reforma 1 +▁agujero 1 +▁constituya 1 +▁diligencia 1 +▁Golán 1 +▁severa 1 +eira 1 +▁prepare 1 +▁Votos 1 +▁llamo 1 +▁Zona 1 +▁recurre 1 +▁prosiga 1 +▁Villa 1 +▁botón 1 +▁XI 1 +▁detenciones 1 +▁comporta 1 +н 1 +▁fronteriza 1 +▁pregunto 1 +▁profesión 1 +▁fichero 1 +▁primavera 1 +▁reanudación 1 +▁ONUDD 1 +▁demasiada 1 +▁entendido 1 +▁perpetua 1 +place 1 +▁indebida 1 +▁Armadas 1 +▁planificar 1 +▁Maldivas 1 +▁consuetudinario 1 +▁conozca 1 +▁ensayo 1 +▁acepte 1 +▁celular 1 +▁sueldo 1 +▁articula 1 +ñi 1 +▁UNIFEM 1 +▁Woods 1 +▁exclusivo 1 +Qaida 1 +▁intenso 1 +▁Robert 1 +▁necesitará 1 +т 1 +▁encontró 1 +▁espectro 1 +▁Ayuda 1 +▁gratis 1 +▁sugerencia 1 +▁celda 1 +▁alberga 1 +▁partículas 1 +▁reasentamiento 1 +▁subyacente 1 +▁varones 1 +▁fracasa 1 +ward 1 +▁Reducción 1 +▁operadores 1 +▁mostró 1 +▁convencida 1 +▁Andorra 1 +▁cumpliendo 1 +▁divide 1 +▁sincero 1 +▁dictado 1 +▁romper 1 +▁Habla 1 +▁divulga 1 +▁Afirma 1 +▁CEPA 1 +▁exhib 1 +▁punta 1 +▁acced 1 +▁Bahamas 1 +▁desventaja 1 +▁exitosa 1 +▁permitiera 1 +▁recinto 1 +▁ingenieros 1 +▁emergentes 1 +▁ministro 1 +▁paludismo 1 +▁refirió 1 +▁ingres 1 +▁Señala 1 +▁Varias 1 +ógeno 1 +stituyó 1 +▁Número 1 +▁circuito 1 +▁terapia 1 +▁prelación 1 +proyecto 1 +▁(2007) 1 +▁Apelaciones 1 +▁Gobernador 1 +▁divertido 1 +▁insuficiencia 1 +▁prevalencia 1 +♪ 1 +pendiendo 1 +▁Samoa 1 +▁Administrador 1 +▁Facebook 1 +▁continuó 1 +▁fabricantes 1 +▁persiste 1 +▁favorito 1 +/2009/ 1 +▁efectividad 1 +▁maltrato 1 +▁barato 1 +▁donaciones 1 +▁dictar 1 +▁gradua 1 +▁OSE 1 +▁intimidación 1 +▁argumenta 1 +▁Antigua 1 +▁menú 1 +▁Islam 1 +▁autónomo 1 +▁comunicó 1 +▁temporario 1 +▁disponía 1 +▁semillas 1 +▁golf 1 +▁demográfica 1 +▁VIII 1 +▁salió 1 +áramos 1 +▁colabor 1 +▁Alimentos 1 +▁cirugía 1 +▁denegación 1 +▁turístico 1 +▁semestre 1 +▁tabla 1 +▁quedará 1 +▁Industria 1 +▁Juventud 1 +▁Occidente 1 +▁Técnica 1 +▁fortaleciendo 1 +▁obtuvo 1 +▁referirme 1 +▁milicias 1 +▁Verificación 1 +▁Objetivo 1 +▁Igualmente 1 +▁optimiza 1 +▁reproducción 1 +▁basura 1 +▁hostil 1 +▁contratante 1 +▁aborde 1 +▁adjudic 1 +▁emplazamiento 1 +▁sensibilizar 1 +▁sostener 1 +▁Museo 1 +▁Mayor 1 +▁agencia 1 +▁dibujo 1 +▁injerencia 1 +▁fumadores 1 +▁interfaz 1 +▁persistencia 1 +▁respete 1 +▁terror 1 +▁faculta 1 +▁Inglaterra 1 +▁Promover 1 +▁designación 1 +▁empoderamiento 1 +▁frío 1 +▁Recuerda 1 +▁55/1 1 +▁Ombudsman 1 +▁atribuye 1 +▁amiga 1 +▁exacta 1 +▁contribuyó 1 +▁femenino 1 +▁reafirmó 1 +▁rigurosa 1 +▁descanso 1 +▁destinatario 1 +▁permitía 1 +▁aventura 1 +▁Belice 1 +▁Africa 1 +▁Bretton 1 +▁Cuestión 1 +▁Suprema 1 +▁confortable 1 +▁interviene 1 +▁excluido 1 +Cuánto 1 +▁Instancia 1 +▁asume 1 +▁Diversidad 1 +▁colegio 1 +▁inundaciones 1 +▁Apelación 1 +▁raíces 1 +▁ultima 1 +▁demográfico 1 +▁Fecha 1 +▁reanuda 1 +▁felices 1 +▁mencionó 1 +▁Recientemente 1 +▁sucediendo 1 +▁Academia 1 +▁intermedio 1 +▁reseña 1 +▁aprendí 1 +▁pesado 1 +▁injusticia 1 +▁órbita 1 +▁alojado 1 +▁descenso 1 +▁informativo 1 +▁emocionante 1 +ísimo 1 +▁calentamiento 1 +▁Instituciones 1 +▁estigma 1 +▁Encargado 1 +▁Observadores 1 +▁contaba 1 +▁Socorro 1 +▁intérprete 1 +▁malnutrición 1 +▁geográfico 1 +convocadas 1 +▁falso 1 +▁desigual 1 +▁ferrocarril 1 +▁terremoto 1 +▁abstenciones 1 +▁Papua 1 +▁Omán 1 +▁atañe 1 +▁mantendrá 1 +▁convergencia 1 +▁reconstruir 1 +▁innovaciones 1 +▁limpio 1 +▁iraní 1 +▁alemana 1 +▁concertar 1 +▁antelación 1 +▁administradores 1 +document 1 +▁enmendar 1 +▁conexo 1 +Ú 1 +▁pertinencia 1 +▁misterio 1 +▁destruye 1 +▁comparable 1 +▁acontecimiento 1 +▁surgir 1 +▁dirigió 1 +▁conceptual 1 +▁France 1 +▁hueso 1 +▁china 1 +Está 1 +▁persecución 1 +▁ingresar 1 +▁mercurio 1 +▁recibiendo 1 +▁ofreciendo 1 +▁reduciendo 1 +▁voces 1 +▁variante 1 +▁tutela 1 +▁linea 1 +▁enviada 1 +/54/ 1 +▁facultativo 1 +▁2010-2011 1 +▁Ejecución 1 +▁problemática 1 +▁recogido 1 +▁visitó 1 +▁concertada 1 +▁turno 1 +comercialización 1 +▁2000-2001 1 +▁ayudó 1 +▁pregunté 1 +▁requerida 1 +▁calificación 1 +▁adherido 1 +▁Microsoft 1 +▁billones 1 +▁coincidi 1 +▁incorpore 1 +▁Región 1 +▁gesto 1 +▁preescolar 1 +▁tropical 1 +Гі 1 +▁reproduce 1 +▁cuota 1 +▁admisible 1 +▁unánime 1 +▁intensidad 1 +▁invierno 1 +▁Decisiones 1 +Sudáfrica 1 +▁recrea 1 +iéndole 1 +▁Turismo 1 +▁vulnera 1 +€ 1 +▁agradezco 1 +▁ceremonia 1 +▁estancamiento 1 +▁penetra 1 +▁prefiere 1 +▁subsahariana 1 +▁ordinaria 1 +▁renovado 1 +▁aprovechamiento 1 +▁estableciera 1 +▁incitación 1 +▁masivo 1 +▁financiamiento 1 +interdependencia 1 +▁Descolonización 1 +▁sorpresa 1 +▁sostenida 1 +▁reputación 1 +wood 1 +▁vertical 1 +▁afro 1 +▁acero 1 +▁añade 1 +▁estrella 1 +▁Francisco 1 +▁inscribir 1 +▁potente 1 +▁jubila 1 +▁Análisis 1 +▁Gubernamentales 1 +▁murieron 1 +▁murió 1 +▁supuestamente 1 +▁expira 1 +▁instalado 1 +Quiere 1 +▁Amsterdam 1 +▁flagrante 1 +▁fútbol 1 +▁galaxia 1 +▁competitivo 1 +▁concertación 1 +▁expresiones 1 +▁efectuada 1 +▁Negocios 1 +▁coalición 1 +▁subsiguiente 1 +▁reclusión 1 +▁genoma 1 +▁obtenga 1 +▁tienden 1 +PNUD 1 +lógica 1 +XXI 1 +▁analítico 1 +▁mercenarios 1 +▁pertenencia 1 +▁ejerza 1 +▁Gui 1 +▁habló 1 +▁típico 1 +▁pasivo 1 +▁58/2 1 +▁Democracia 1 +▁Inversiones 1 +▁flagelo 1 +▁prestigio 1 +▁prorrateo 1 +▁Órgano 1 +▁abruma 1 +▁motivación 1 +▁victoria 1 +▁hábitat 1 +▁refuerce 1 +▁diseñada 1 +▁curva 1 +▁interrogatorio 1 +▁aplaud 1 +▁revoca 1 +▁préstamo 1 +▁Amnistía 1 +▁canadiense 1 +▁contemporánea 1 +▁desfavorecidos 1 +▁estímulo 1 +▁reúna 1 +▁exacto 1 +▁devastador 1 +▁gravado 1 +▁Biológica 1 +▁externo 1 +▁Development 1 +▁horrible 1 +▁progenitor 1 +▁Berlín 1 +▁comencé 1 +▁inmensa 1 +▁bordo 1 +▁desarrolle 1 +▁sucesivo 1 +▁batería 1 +▁oscura 1 +▁atribuir 1 +▁(1998) 1 +▁renovar 1 +▁recupera 1 +▁análoga 1 +▁espectáculo 1 +▁Linux 1 +▁infecciones 1 +▁disputa 1 +▁XIX 1 +Ley 1 +▁Biblioteca 1 +▁Lesotho 1 +▁democratización 1 +▁medición 1 +▁esposo 1 +▁revisa 1 +5-0 1 +> 1 +▁pabellón 1 +▁mantuvier 1 +▁luces 1 +▁Swazilandia 1 +▁Taiwán 1 +▁diversificación 1 +▁kuwaití 1 +▁alojarte 1 +▁matrícula 1 +▁refuerzo 1 +▁tranquila 1 +▁consolidado 1 +▁asistieron 1 +▁aislado 1 +▁recabar 1 +▁modalidad 1 +▁Aspectos 1 +▁embarca 1 +▁colección 1 +▁admira 1 +▁consigo 1 +▁Recomienda 1 +▁autobús 1 +▁complemento 1 +© 1 +▁énfasis 1 +▁llamó 1 +▁sensibilidad 1 +▁bandera 1 +▁valiente 1 +▁suave 1 +OPS 1 +▁Bangkok 1 +▁Distrito 1 +▁Prohibición 1 +▁contrarrestar 1 +▁fantástico 1 +▁produciendo 1 +gún 1 +▁investigador 1 +▁numer 1 +▁asegur 1 +▁Booking 1 +▁arquitecto 1 +▁británica 1 +▁liquidación 1 +▁sabiduría 1 +▁ánimo 1 +▁neuronas 1 +▁cristal 1 +cogemos 1 +▁Degradantes 1 +▁remesas 1 +▁Bienestar 1 +▁crónica 1 +▁deportivo 1 +Dónde 1 +▁Manual 1 +▁Estratégico 1 +▁síntomas 1 +▁típica 1 +▁plasma 1 +▁Adición 1 +▁destruido 1 +8.000 1 +▁Subcomité 1 +▁Suriname 1 +▁Simplemente 1 +▁juguete 1 +▁polvo 1 +▁comprobado 1 +▁juega 1 +▁hablo 1 +▁desempeñe 1 +▁billete 1 +▁estimó 1 +▁desierto 1 +▁Series 1 +pdf 1 +▁emprende 1 +▁Contratante 1 +▁abolición 1 +▁desconocido 1 +▁filosofía 1 +▁muchísimo 1 +▁prevalece 1 +▁recorrido 1 +▁alarmante 1 +▁revolucion 1 +▁dibuja 1 +▁preveía 1 +▁establecieron 1 +▁automática 1 +▁Océano 1 +▁credenciales 1 +▁litigio 1 +▁Respuesta 1 +▁requería 1 +▁diputado 1 +▁sentí 1 +▁Tokelau 1 +▁inquieta 1 +▁that 1 +▁selectiva 1 +/2002 1 +icultura 1 +▁diapositiva 1 +SIÓN 1 +▁Bosques 1 +▁ocurriendo 1 +▁permítanme 1 +▁centenar 1 +▁receta 1 +▁requiera 1 +▁delicado 1 +▁depositario 1 +▁film 1 +▁incondicional 1 +▁mercantil 1 +▁Bagdad 1 +▁decidieron 1 +▁colonia 1 +▁Comercial 1 +▁Judicial 1 +▁Profesional 1 +▁redujo 1 +▁trauma 1 +▁regulador 1 +▁cooperativas 1 +persona 1 +▁enérgica 1 +▁expedición 1 +▁revitalización 1 +с 1 +▁moratoria 1 +▁especula 1 +▁mamá 1 +▁Príncipe 1 +▁ascendía 1 +▁designada 1 +▁sexta 1 +▁vecino 1 +▁Reglamentación 1 +▁aparición 1 +▁atentamente 1 +▁reacciones 1 +▁Pesca 1 +▁adolescente 1 +▁magistrado 1 +▁fundador 1 +▁Barbuda 1 +▁distinguir 1 +▁drástica 1 +▁Página 1 +▁reivindica 1 +▁oscuro 1 +-2004 1 +▁Probablemente 1 +▁Véanse 1 +▁Ofrece 1 +PRST 1 +▁suscrito 1 +▁invite 1 +▁salarial 1 +Informe 1 +▁trágico 1 +▁sitúa 1 +▁muestre 1 +▁revisiones 1 +▁Irak 1 +▁esquema 1 +▁agotan 1 +▁Resoluciones 1 +▁pretexto 1 +▁residual 1 +▁Palacio 1 +▁segmento 1 +▁adversa 1 +▁guiar 1 +▁reparar 1 +▁Móvil 1 +▁Treaty 1 +▁aconseja 1 +▁cuartel 1 +▁fortalezca 1 +▁jurisdicciones 1 +▁penitenciaria 1 +▁precursores 1 +▁abuelo 1 +▁romaní 1 +▁Crueles 1 +▁errónea 1 +▁longitud 1 +▁monumento 1 +▁griego 1 +▁portátil 1 +▁botella 1 +▁artística 1 +▁optimista 1 +▁Cairo 1 +▁Subprograma 1 +▁estuve 1 +▁esquina 1 +¡ 1 +▁ingrediente 1 +▁cuantifica 1 +▁elegida 1 +▁densidad 1 +▁periodista 1 +emisor 1 +▁participe 1 +▁gratitud 1 +▁pudiéramos 1 +▁temprano 1 +▁tsunami 1 +▁apasiona 1 +▁contrabando 1 +sgraciadamente 1 +▁vote 1 +ô 1 +▁Ambiental 1 +▁divisas 1 +▁intermediario 1 +▁competir 1 +▁elogia 1 +ómetro 1 +▁XII 1 +▁Logística 1 +▁reembolsa 1 +▁Mercado 1 +▁preventivo 1 +▁exención 1 +▁describ 1 +▁imparte 1 +▁delicada 1 +▁satisface 1 +▁Moscú 1 +▁decepciona 1 +▁sabido 1 +▁dictada 1 +▁norteamericano 1 +▁patrocina 1 +▁advertencia 1 +▁certeza 1 +▁restringir 1 +▁remedio 1 +▁Voluntarios 1 +▁fósiles 1 +▁oxígeno 1 +▁predecir 1 +▁somalí 1 +▁ideología 1 +▁espalda 1 +▁alcalde 1 +▁contraseña 1 +XXVI 1 +Demócrata 1 +▁vigencia 1 +▁alegría 1 +▁Frank 1 +▁camiones 1 +▁leído 1 +▁Haciend 1 +▁horizontal 1 +▁introducida 1 +▁líquido 1 +▁concebido 1 +▁absorbe 1 +▁decidimos 1 +Nueva 1 +▁acostumbra 1 +▁homicidio 1 +▁textil 1 +▁Siguiendo 1 +▁huelga 1 +▁Obama 1 +▁cruel 1 +▁audio 1 +.105/ 1 +▁Economía 1 +▁FNUAP 1 +▁MANUD 1 +▁diplomacia 1 +▁jardines 1 +▁Considero 1 +▁equipado 1 +▁centró 1 +▁contraídas 1 +▁alegra 1 +incluido 1 +▁envíe 1 +▁recuerde 1 +▁comience 1 +▁radiación 1 +▁aterriza 1 +▁Ambos 1 +▁FPNUL 1 +▁Subsecretario 1 +▁protagonista 1 +▁registró 1 +▁restrictiva 1 +▁Conjunto 1 +▁Estructura 1 +▁comprometida 1 +▁correspondía 1 +▁termine 1 +▁invisible 1 +▁otoño 1 +▁motivado 1 +▁explícita 1 +▁dormitorio 1 +▁entusiasmo 1 +▁ratifique 1 +▁pudiese 1 +▁filtra 1 +▁variaciones 1 +▁parezca 1 +Italia 1 +China 1 +▁tranquilo 1 +▁disparo 1 +▁vivido 1 +sexual 1 +▁Subdivisión 1 +▁artefactos 1 +▁libanés 1 +▁Contribuciones 1 +▁arriesga 1 +▁existiendo 1 +▁demostró 1 +▁enfermera 1 +▁Provincia 1 +▁tendiente 1 +▁multitud 1 +▁trienal 1 +▁Debian 1 +▁recorte 1 +scribió 1 +safortunadamente 1 +▁Magistrado 1 +sproporcionada 1 +▁concluya 1 +7.000 1 +▁pautas 1 +Canadá 1 +▁Senado 1 +métrica 1 +▁equipamiento 1 +▁ratificó 1 +▁abuela 1 +▁Ahí 1 +▁confiere 1 +gregación 1 +▁introduce 1 +▁marginados 1 +▁patrulla 1 +▁Apertura 1 +▁ocurra 1 +México 1 +▁Emergencia 1 +▁analfabetismo 1 +▁italiana 1 +confidencialidad 1 +▁intercambia 1 +▁Adelantados 1 +▁asombroso 1 +▁multianual 1 +▁aceptó 1 +▁alinea 1 +▁Coalición 1 +▁adquiere 1 +▁comprenda 1 +▁autopista 1 +▁contenedor 1 +CCPR 1 +▁Vivienda 1 +▁calificaciones 1 +▁creíble 1 +▁facilitó 1 +▁evacua 1 +▁pelea 1 +▁Cuarteto 1 +▁Tráfico 1 +▁excelencia 1 +▁ofreció 1 +▁fiabilidad 1 +▁dispensa 1 +▁excusa 1 +▁computador 1 +▁explosión 1 +▁demostración 1 +▁exitoso 1 +▁sugerir 1 +▁percibe 1 +▁ESPAÑOL 1 +▁arrecife 1 +▁fármaco 1 +▁redoblar 1 +▁asigne 1 +▁escritura 1 +▁aislada 1 +▁rescate 1 +demócrata 1 +▁desviación 1 +▁Annan 1 +▁Adicional 1 +▁desglosa 1 +▁Reconciliación 1 +▁demuestre 1 +▁exagera 1 +▁islámica 1 +▁mecánica 1 +▁eligió 1 +▁reloj 1 +▁suicida 1 +▁comenzando 1 +▁plurianual 1 +▁prerrogativa 1 +▁muebles 1 +▁rechazó 1 +▁turística 1 +▁Podrá 1 +▁cambie 1 +▁permitiendo 1 +▁diagnostic 1 +cogieron 1 +в 1 +▁Abstenciones 1 +▁Marshall 1 +▁clásico 1 +▁mérito 1 +▁acogido 1 +▁brote 1 +evolucion 1 +▁convencional 1 +▁INGLÉS 1 +▁Solidaridad 1 +▁exhortó 1 +▁perseguir 1 +▁abrió 1 +▁inicie 1 +▁efectu 1 +▁innecesaria 1 +▁vectores 1 +▁nocivas 1 +▁cuenca 1 +▁marginal 1 +▁elemental 1 +▁susceptible 1 +▁Seguro 1 +▁arroz 1 +Alguien 1 +▁Recuerdo 1 +▁Richard 1 +▁Violencia 1 +▁piratería 1 +▁reproducir 1 +▁Michael 1 +▁gripe 1 +▁veinte 1 +▁esboza 1 +▁mortal 1 +privada 1 +▁ecosistema 1 +▁desempleados 1 +▁endeudados 1 +▁monopolio 1 +▁niñez 1 +▁percibir 1 +▁séptima 1 +▁alcanzó 1 +▁agresiones 1 +▁defiende 1 +▁inadecuada 1 +▁inmueble 1 +▁maniobra 1 +▁Gambia 1 +▁transferido 1 +▁Pleno 1 +Cuarta 1 +▁regir 1 +▁suplementaria 1 +▁vergüenza 1 +▁Piensen 1 +▁entendemos 1 +▁injusta 1 +▁planteó 1 +▁abejas 1 +▁servido 1 +▁certifica 1 +▁Schengen 1 +▁agotamiento 1 +▁comisaría 1 +▁signatura 1 +prendió 1 +▁Adquisiciones 1 +▁audiovisual 1 +▁Street 1 +▁ofrecía 1 +▁molesta 1 +Podría 1 +▁tarda 1 +Direct 1 +▁núm 1 +▁Inmigración 1 +▁Nuclear 1 +▁héroe 1 +▁idónea 1 +▁preferible 1 +▁intencional 1 +▁engaño 1 +control 1 +▁musulmana 1 +▁pidiendo 1 +▁curiosidad 1 +▁inminente 1 +▁AOD 1 +▁vegetal 1 +▁emitida 1 +▁Seguir 1 +▁abundante 1 +▁contemporáneo 1 +▁Women 1 +▁indirecto 1 +iéndolo 1 +▁Association 1 +▁Sumario 1 +▁inclina 1 +▁detrimento 1 +▁Debate 1 +▁moderado 1 +▁Arbitraje 1 +▁Desastres 1 +▁Ecuatorial 1 +▁Gibraltar 1 +▁censura 1 +▁clínico 1 +▁posibilita 1 +▁domingo 1 +▁repatriados 1 +▁Encomia 1 +▁angular 1 +▁introdujo 1 +▁represalia 1 +▁climática 1 +▁convincente 1 +▁pesada 1 +▁alimento 1 +▁aproveche 1 +▁cumplía 1 +▁prohibiciones 1 +▁riguroso 1 +▁vanguardia 1 +▁Abdul 1 +Corr 1 +grupo 1 +text 1 +▁consume 1 +▁Incluye 1 +▁desacuerdo 1 +▁exacerba 1 +▁introductoria 1 +▁utilizó 1 +▁reasign 1 +▁inútil 1 +▁resistente 1 +▁Vicente 1 +▁ballena 1 +▁honra 1 +▁químico 1 +▁Brunei 1 +▁discrepancia 1 +▁extremismo 1 +▁franceses 1 +▁dividir 1 +▁síntesis 1 +▁DVD 1 +▁huevo 1 +▁empuja 1 +▁compatibilidad 1 +▁descubrió 1 +▁disminuyó 1 +▁urbanización 1 +▁reunificación 1 +▁Charles 1 +▁remitido 1 +▁confusión 1 +▁Parque 1 +evaluación 1 +▁Fundamentales 1 +▁Ministra 1 +▁comandante 1 +▁comunique 1 +▁reunieron 1 +▁apelaciones 1 +▁Condena 1 +▁Organiza 1 +resoluciones 1 +▁encaminado 1 +▁Erradicación 1 +▁Excelencia 1 +▁PRESIDENTE 1 +▁exactitud 1 +▁insectos 1 +▁matemático 1 +▁microcrédito 1 +▁predecesor 1 +▁dormir 1 +▁extinción 1 +struyó 1 +▁marginación 1 +▁presidido 1 +▁Viernes 1 +▁humilla 1 +▁rehenes 1 +▁pudimos 1 +▁Multi 1 +▁1.0 1 +▁sucesor 1 +▁Jurídico 1 +▁Básicamente 1 +▁Permítaseme 1 +▁Pobreza 1 +▁Soviética 1 +▁Belgrado 1 +▁Enmienda 1 +▁supervisor 1 +▁Solicita 1 +▁Times 1 +▁emoción 1 +▁turca 1 +▁catalizador 1 +▁descubrimos 1 +▁sospechosas 1 +▁subasta 1 +▁Steve 1 +CERD 1 +▁rehabilit 1 +▁Profesor 1 +▁literatura 1 +▁remunerado 1 +▁alemanes 1 +▁escalera 1 +▁rectifica 1 +▁probado 1 +$ 1 +▁Apoyamos 1 +▁Ninguna 1 +▁pájaro 1 +▁University 1 +▁propongo 1 +▁filtro 1 +▁suplente 1 +▁quisiéramos 1 +▁deteriora 1 +▁lentamente 1 +▁turistas 1 +▁punible 1 +▁Claro 1 +▁Twitter 1 +▁reconocieron 1 +▁Jueves 1 +▁generosa 1 +▁contradicción 1 +▁Abeba 1 +▁belga 1 +▁concesiones 1 +▁estabilizar 1 +institucionales 1 +Alemania 1 +́ 1 +▁UNAMSIL 1 +▁asamblea 1 +▁encarecidamente 1 +▁progresivo 1 +▁refrigera 1 +р 1 +▁afgana 1 +Federación 1 +▁atrocidades 1 +▁placa 1 +▁accesibilidad 1 +▁apátrida 1 +▁multisectorial 1 +▁quinquenal 1 +▁reflejo 1 +▁descarta 1 +http 1 +europe 1 +atlántica 1 +▁Cercano 1 +▁Gabinete 1 +▁interactuar 1 +▁llegué 1 +▁prototipo 1 +▁referendo 1 +▁Situado 1 +▁espejo 1 +▁insumos 1 +▁Prensa 1 +▁imprevistos 1 +▁tóxicos 1 +▁comenzamos 1 +▁recesión 1 +▁construida 1 +▁Espera 1 +▁Detallada 1 +▁aborígenes 1 +▁excombatientes 1 +▁inmunización 1 +▁arrastra 1 +▁Edición 1 +▁tecla 1 +▁Obviamente 1 +▁Service 1 +▁progresar 1 +▁prudente 1 +▁tormenta 1 +▁Tokio 1 +▁descuento 1 +▁invertido 1 +▁Court 1 +tuvimos 1 +torgamiento 1 +▁Darussalam 1 +▁introduzca 1 +▁suplementario 1 +▁Minorías 1 +▁escoger 1 +▁hereda 1 +▁mixta 1 +▁HUMANOS 1 +▁Salomón 1 +▁abstenerse 1 +▁atraviesa 1 +▁soberana 1 +▁sábado 1 +▁colonos 1 +▁tentativa 1 +▁Funciona 1 +▁vestido 1 +comunicación 1 +▁Harvard 1 +▁Tuvalu 1 +▁biotecnología 1 +▁inglesa 1 +▁restauración 1 +▁subterránea 1 +▁encomendado 1 +▁exigía 1 +▁corona 1 +▁conozco 1 +▁embajador 1 +▁táctica 1 +▁convicciones 1 +▁Amazon 1 +▁barata 1 +▁emerge 1 +ò 1 +▁relevancia 1 +▁Rumanía 1 +▁tardía 1 +▁asentamiento 1 +▁indiscriminado 1 +▁metilbromuro 1 +▁vecindad 1 +▁persigue 1 +▁Continua 1 +¿ 1 +Francia 1 +▁adulto 1 +▁sostuvo 1 +▁imperante 1 +▁giro 1 +▁salvaguardia 1 +▁hormiga 1 +▁perdieron 1 +▁restaurar 1 +▁CFC 1 +▁muerta 1 +‘ 1 +▁Agrícola 1 +▁apariencia 1 +▁escuchó 1 +▁Barroso 1 +▁adulta 1 +▁culto 1 +▁grita 1 +▁emociona 1 +▁Instrumento 1 +▁Palestino 1 +{ 1 +▁promovido 1 +▁esclavos 1 +▁impedido 1 +▁vidrio 1 +▁prospera 1 +▁medicamento 1 +oficina 1 +▁Preparatoria 1 +▁Xenofobia 1 +▁autobuses 1 +▁fortaleza 1 +▁hostigamiento 1 +▁innecesario 1 +▁insostenible 1 +▁necesito 1 +▁tranquilidad 1 +▁subvención 1 +▁jugando 1 +▁octava 1 +▁Juegos 1 +▁ligero 1 +▁esclarec 1 +▁plaga 1 +▁Deporte 1 +▁inmobiliario 1 +▁sufriendo 1 +▁truco 1 +▁recibo 1 +▁Center 1 +parlamentaria 1 +Relator 1 +} 1 +± 1 +ê 1 +č 1 +š 1 +у 1 +л 1 +Ñ 1 +к 1 +м 1 +ø 1 +â 1 +п 1 +д 1 +· 1 +ï 1 +ì 1 +î 1 +å 1 +ë 1 +Р 1 +ل 1 +я 1 +ы 1 +б 1 +َ 1 +― 1 +з 1 +ي 1 +` 1 +є 1 +г 1 +„ 1 +∗ 1 +й 1 +ь 1 +В 1 +ß 1 +ž 1 +μ 1 +Ѓ 1 +§ 1 +ù 1 +‰ 1 +< 1 +ş 1 +« 1 +ł 1 +\ 1 +õ 1 +ð 1 +ا 1 +Ö 1 +Č 1 +х 1 +û 1 +Ο 1 +ć 1 +£ 1 +ă 1 +æ 1 +α 1 +ю 1 +‹ 1 +ā 1 +‚ 1 +ė 1 +ã 1 +ę 1 +Û 1 +Ü 1 +ı 1 +~ 1 +ш 1 +Å 1 +ر 1 + 1 +• 1 +Ç 1 +ŷ 1 +ι 1 +Ž 1 +œ 1 +─ 1 +ý 1 +Ä 1 +ו 1 +Ⴗ 1 +ت 1 +ф 1 +σ 1 +ن 1 +→ 1 +ą 1 +− 1 +‡ 1 +ο 1 +τ 1 +щ 1 +э 1 +ε 1 +ب 1 +^ 1 +ğ 1 +ś 1 +ż 1 +م 1 +ה 1 +ň 1 +È 1 +ъ 1 +¦ 1 +Ş 1 +Т 1 +ő 1 +● 1 +ѓ 1 +κ 1 +د 1 +ة 1 +ÿ 1 +і 1 +Џ 1 +ν 1 +К 1 +ש 1 +▪ 1 +À 1 +و 1 +ī 1 +ĝ 1 +ō 1 +ْ 1 +ū 1 +† 1 +υ 1 +О 1 +λ 1 +И 1 +ό 1 +י 1 +ל 1 +π 1 +η 1 +Н 1 +İ 1 +І 1 +س 1 +خ 1 +ع 1 +מ 1 +ك 1 +β 1 +ח 1 +语 1 +Ì 1 +ί 1 +П 1 +ા 1 +ר 1 +А 1 +ב 1 +נ 1 +ρ 1 +ά 1 +М 1 +ŝ 1 +ŭ 1 +Ò 1 +× 1 +ת 1 +■ 1 +Ê 1 +ź 1 +ٌ 1 +¥ 1 +̊ 1 +ї 1 +Е 1 +δ 1 +ĉ 1 +ע 1 +þ 1 +Ł 1 +Ε 1 +ّ 1 +ન 1 +χ 1 +Đ 1 +ه 1 +“ 1 +ς 1 +Х 1 +פ 1 +، 1 +ح 1 +ढ 1 +ी 1 +ो 1 +′ 1 +¢ 1 +Ι 1 +ف 1 +У 1 +θ 1 +γ 1 +¬ 1 +א 1 +ط 1 +ો 1 +÷ 1 +Κ 1 +З 1 +ى 1 +ی 1 +ा 1 +ર 1 +อ 1 +ķ 1 +¤ 1 +ય 1 +ق 1 +ȣ 1 +Ф 1 +ק 1 +ص 1 +े 1 +‐ 1 +≈ 1 +○ 1 +★ 1 +北 1 +Ï 1 +ή 1 +ד 1 +ش 1 +म 1 +ી 1 +غ 1 +我 1 +个 1 +你 1 +Ч 1 +Ш 1 +Ë 1 +Α 1 +Б 1 +જ 1 +¶ 1 +Ô 1 +ų 1 +ё 1 +ם 1 +ث 1 +ं 1 +ई 1 +ड 1 +त 1 +य 1 +र 1 +ि 1 +ં 1 +ે 1 +્ 1 +ဪ 1 +◆ 1 +♲ 1 +ē 1 +आ 1 +ક 1 +ુ 1 +神 1 +ز 1 +่ 1 +牙 1 +以 1 +的 1 +妈 1 +在 1 +Л 1 +一 1 +ύ 1 +中 1 +भ 1 +海 1 +ṛ 1 +Ý 1 +ģ 1 +Ń 1 +ť 1 +ů 1 +ǎ 1 +ǵ 1 +Ν 1 +Ρ 1 +έ 1 +ξ 1 +ω 1 +ћ 1 +ג 1 +ך 1 +ן 1 +ض 1 +ڤ 1 +ग 1 +ज 1 +थ 1 +न 1 +ગ 1 +ચ 1 +દ 1 +ย 1 +ร 1 +ၝ 1 +√ 1 +≤ 1 +♦ 1 +❑ 1 +。 1 +』 1 +不 1 +与 1 +出 1 +台 1 +啊 1 +正 1 +洋 1 +ј 1 +ु 1 +સ 1 +ṣ 1 +大 1 +法 1 +西 1 +香 1 +面 1 +行 1 +游 1 +港 1 +来 1 +执 1 +地 1 +历 1 +决 1 +内 1 +仲 1 +了 1 +► 1 +∑ 1 +ၛ 1 +ષ 1 +વ 1 +લ 1 +પ 1 +ધ 1 +ડ 1 +। 1 +् 1 +ह 1 +צ 1 +כ 1 +Ц 1 +Ř 1 +Œ 1 +ĵ 1 +Ĉ 1 +有 1 +‛ 1 +क 1 +ז 1 +Є 1 +Μ 1 +ʾ 1 +ľ 1 +裁 1 +ج 1 +إ 1 +Э 1 +Д 1 +Σ 1 +़ 1 +ќ 1 +Î 1 +أ 1 +њ 1 +※ 1 +љ 1 +Ő 1 +” 1 +Ţ 1 +Ј 1 +̧ 1 +Ś 1 +♫ 1 +ţ 1 +Ć 1 +प 1 +— 1 +đ 1 +› 1 +Ŷ 1 +Ø 1 +ě 1 +؟ 1 +ř 1 +Љ 1 +Њ 1 +⁄ 1 +ц 1 +ж 1 +Ќ 1 +̈ 1 +Ћ 1 +之 1 +制 1 +发 1 +展 1 +度 1 +ч 1 +Ђ 1 +ń 1 +互 1 +相 1 +下 1 +可 1 +唱 1 +教 1 +曲 1 +歌 1 +给 1 +φ 1 +Й 1 +京 1 +公 1 +名 1 +声 1 +多 1 +居 1 +工 1 +很 1 +心 1 +方 1 +晟 1 +結 1 +网 1 +苑 1 +બ 1 +ṭ 1 +山 1 +政 1 +果 1 +白 1 +社 1 +铭 1 +ű 1 +Ÿ 1 +ǻ 1 +₤ 1 +持 1 +­ 1 +Æ 1 +Ā 1 +Ă 1 +ċ 1 +ď 1 +į 1 +Ľ 1 +ņ 1 +ŕ 1 +Ū 1 +ƒ 1 +ǒ 1 +Β 1 +Υ 1 +Φ 1 +Ω 1 +ζ 1 +Ѕ 1 +Ж 1 +Ы 1 +Я 1 +ט 1 +ף 1 +ؤ 1 +ً 1 +ख 1 +स 1 +એ 1 +ખ 1 +ણ 1 +થ 1 +મ 1 +શ 1 +િ 1 +ธ 1 +ศ 1 +ะ 1 +ใ 1 +ၡ 1 +ၢ 1 +ṇ 1 +ẹ 1 +ọ 1 +ờ 1 +‒ 1 +⇒ 1 +⇢ 1 +∙ 1 +≥ 1 +□ 1 +◙ 1 +◦ 1 +◯ 1 +♣ 1 +『 1 +い 1 +お 1 +し 1 +も 1 +ろ 1 +世 1 +並 1 +为 1 +伯 1 +作 1 +修 1 +做 1 +农 1 +几 1 +分 1 +剥 1 +参 1 +合 1 +和 1 +咬 1 +喜 1 +嘛 1 +外 1 +太 1 +妨 1 +姐 1 +完 1 +寓 1 +局 1 +帝 1 +年 1 +建 1 +後 1 +徳 1 +情 1 +慧 1 +成 1 +所 1 +拉 1 +探 1 +星 1 +木 1 +松 1 +比 1 +燰 1 +特 1 +王 1 +甜 1 +生 1 +界 1 +眼 1 +租 1 +等 1 +紧 1 +美 1 +翻 1 +臺 1 +色 1 +茶 1 +葱 1 +藤 1 +要 1 +见 1 +视 1 +角 1 +言 1 +請 1 +设 1 +译 1 +课 1 +赠 1 +路 1 +载 1 +農 1 +连 1 +送 1 +這 1 +鏮 1 +鑚 1 +镜 1 +问 1 +阳 1 +陈 1 +院 1 +Š 1 +Ė 1 +Ķ 1 +ț 1 +ذ 1 +આ 1 +พ 1 +↕ 1 +⇕ 1 +炎 1 +Ę 1 +ļ 1 +હ 1 +三 1 +取 1 +寄 1 +甸 1 +返 1 +イ 1 +オ 1 +セ 1 +ッ 1 +デ 1 +Ў 1 +俄 1 +利 1 +德 1 +意 1 +日 1 +汉 1 +班 1 +英 1 +萄 1 +葡 1 +阿 1 + 1 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_es/spm_unigram10000.model b/SpeechT5/SpeechUT/dataset/MuSTC/en_es/spm_unigram10000.model new file mode 100644 index 0000000000000000000000000000000000000000..0e4b629ec2ad96b86fc185ba4d56b48470a18983 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_es/spm_unigram10000.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06d8938b989c1816cfd96c05c6db0f9f23f420ad0d7ac966857cd8b29b2ce572 +size 417070 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/config.yaml b/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dce5f63011a8c33a4d12eec569fdcc91ea299f68 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/config.yaml @@ -0,0 +1,3 @@ +vocab_filename: dict.spm.txt +src_vocab_filename: dict.kmu.txt + diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/config_enfr.yaml b/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/config_enfr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd080a05500211cade57d80056c8ce311ce4c0c2 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/config_enfr.yaml @@ -0,0 +1,14 @@ +bpe_tokenizer: + bpe: sentencepiece + sentencepiece_model: spm_unigram10000.model + +sampling_alpha: 1.0 +shuffle: false +use_audio_input: true +use_sample_rate: 16000 + +vocab_filename: dict.spm.txt + +# required by speech_to_text task but never used +input_channels: 1 +input_feat_per_channel: 1 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/dict.kmu.txt b/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/dict.kmu.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbfe59e554d6234f3631d8d09d9281c2160f4675 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/dict.kmu.txt @@ -0,0 +1,500 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 +50 50 +51 51 +52 52 +53 53 +54 54 +55 55 +56 56 +57 57 +58 58 +59 59 +60 60 +61 61 +62 62 +63 63 +64 64 +65 65 +66 66 +67 67 +68 68 +69 69 +70 70 +71 71 +72 72 +73 73 +74 74 +75 75 +76 76 +77 77 +78 78 +79 79 +80 80 +81 81 +82 82 +83 83 +84 84 +85 85 +86 86 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 +110 110 +111 111 +112 112 +113 113 +114 114 +115 115 +116 116 +117 117 +118 118 +119 119 +120 120 +121 121 +122 122 +123 123 +124 124 +125 125 +126 126 +127 127 +128 128 +129 129 +130 130 +131 131 +132 132 +133 133 +134 134 +135 135 +136 136 +137 137 +138 138 +139 139 +140 140 +141 141 +142 142 +143 143 +144 144 +145 145 +146 146 +147 147 +148 148 +149 149 +150 150 +151 151 +152 152 +153 153 +154 154 +155 155 +156 156 +157 157 +158 158 +159 159 +160 160 +161 161 +162 162 +163 163 +164 164 +165 165 +166 166 +167 167 +168 168 +169 169 +170 170 +171 171 +172 172 +173 173 +174 174 +175 175 +176 176 +177 177 +178 178 +179 179 +180 180 +181 181 +182 182 +183 183 +184 184 +185 185 +186 186 +187 187 +188 188 +189 189 +190 190 +191 191 +192 192 +193 193 +194 194 +195 195 +196 196 +197 197 +198 198 +199 199 +200 200 +201 201 +202 202 +203 203 +204 204 +205 205 +206 206 +207 207 +208 208 +209 209 +210 210 +211 211 +212 212 +213 213 +214 214 +215 215 +216 216 +217 217 +218 218 +219 219 +220 220 +221 221 +222 222 +223 223 +224 224 +225 225 +226 226 +227 227 +228 228 +229 229 +230 230 +231 231 +232 232 +233 233 +234 234 +235 235 +236 236 +237 237 +238 238 +239 239 +240 240 +241 241 +242 242 +243 243 +244 244 +245 245 +246 246 +247 247 +248 248 +249 249 +250 250 +251 251 +252 252 +253 253 +254 254 +255 255 +256 256 +257 257 +258 258 +259 259 +260 260 +261 261 +262 262 +263 263 +264 264 +265 265 +266 266 +267 267 +268 268 +269 269 +270 270 +271 271 +272 272 +273 273 +274 274 +275 275 +276 276 +277 277 +278 278 +279 279 +280 280 +281 281 +282 282 +283 283 +284 284 +285 285 +286 286 +287 287 +288 288 +289 289 +290 290 +291 291 +292 292 +293 293 +294 294 +295 295 +296 296 +297 297 +298 298 +299 299 +300 300 +301 301 +302 302 +303 303 +304 304 +305 305 +306 306 +307 307 +308 308 +309 309 +310 310 +311 311 +312 312 +313 313 +314 314 +315 315 +316 316 +317 317 +318 318 +319 319 +320 320 +321 321 +322 322 +323 323 +324 324 +325 325 +326 326 +327 327 +328 328 +329 329 +330 330 +331 331 +332 332 +333 333 +334 334 +335 335 +336 336 +337 337 +338 338 +339 339 +340 340 +341 341 +342 342 +343 343 +344 344 +345 345 +346 346 +347 347 +348 348 +349 349 +350 350 +351 351 +352 352 +353 353 +354 354 +355 355 +356 356 +357 357 +358 358 +359 359 +360 360 +361 361 +362 362 +363 363 +364 364 +365 365 +366 366 +367 367 +368 368 +369 369 +370 370 +371 371 +372 372 +373 373 +374 374 +375 375 +376 376 +377 377 +378 378 +379 379 +380 380 +381 381 +382 382 +383 383 +384 384 +385 385 +386 386 +387 387 +388 388 +389 389 +390 390 +391 391 +392 392 +393 393 +394 394 +395 395 +396 396 +397 397 +398 398 +399 399 +400 400 +401 401 +402 402 +403 403 +404 404 +405 405 +406 406 +407 407 +408 408 +409 409 +410 410 +411 411 +412 412 +413 413 +414 414 +415 415 +416 416 +417 417 +418 418 +419 419 +420 420 +421 421 +422 422 +423 423 +424 424 +425 425 +426 426 +427 427 +428 428 +429 429 +430 430 +431 431 +432 432 +433 433 +434 434 +435 435 +436 436 +437 437 +438 438 +439 439 +440 440 +441 441 +442 442 +443 443 +444 444 +445 445 +446 446 +447 447 +448 448 +449 449 +450 450 +451 451 +452 452 +453 453 +454 454 +455 455 +456 456 +457 457 +458 458 +459 459 +460 460 +461 461 +462 462 +463 463 +464 464 +465 465 +466 466 +467 467 +468 468 +469 469 +470 470 +471 471 +472 472 +473 473 +474 474 +475 475 +476 476 +477 477 +478 478 +479 479 +480 480 +481 481 +482 482 +483 483 +484 484 +485 485 +486 486 +487 487 +488 488 +489 489 +490 490 +491 491 +492 492 +493 493 +494 494 +495 495 +496 496 +497 497 +498 498 +499 499 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/dict.spm.txt b/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/dict.spm.txt new file mode 100644 index 0000000000000000000000000000000000000000..db33a0589715d19a009ac803a74623e7aa436f39 --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/dict.spm.txt @@ -0,0 +1,9997 @@ +▁de 1 +, 1 +' 1 +s 1 +. 1 +▁la 1 +▁et 1 +▁l 1 +▁des 1 +▁les 1 +▁à 1 +▁d 1 +▁le 1 +▁ 1 +▁du 1 +▁en 1 +’ 1 +▁que 1 +e 1 +- 1 +▁pour 1 +▁dans 1 +▁a 1 +▁un 1 +▁sur 1 +▁au 1 +▁qui 1 +▁une 1 +▁par 1 +▁( 1 +▁est 1 +) 1 +es 1 +▁aux 1 +▁ou 1 +▁qu 1 +▁pas 1 +▁ce 1 +r 1 +▁plus 1 +▁Le 1 +▁s 1 +▁sont 1 +a 1 +est 1 +er 1 +▁n 1 +▁ne 1 +un 1 +▁nous 1 +▁: 1 +▁avec 1 +▁ont 1 +il 1 +▁Les 1 +▁La 1 +▁L 1 +▁se 1 +▁été 1 +/ 1 +▁vous 1 +une 1 +▁il 1 +▁C 1 +ment 1 +▁c 1 +; 1 +▁être 1 +nt 1 +▁Il 1 +▁ces 1 +▁cette 1 +é 1 +▁leur 1 +t 1 +on 1 +▁comme 1 +▁fait 1 +▁Canada 1 +ant 1 +▁« 1 +▁pays 1 +▁M 1 +▁son 1 +en 1 +▁je 1 +y 1 +▁sa 1 +▁- 1 +▁y 1 +▁En 1 +▁si 1 +▁S 1 +▁A 1 +▁peut 1 +S 1 +▁entre 1 +▁faire 1 +▁• 1 +▁1 1 +▁mais 1 +A 1 +ent 1 +▁tout 1 +▁développement 1 +c 1 +▁D 1 +▁tous 1 +ai 1 +▁Et 1 +▁ses 1 +ait 1 +: 1 +d 1 +▁rapport 1 +▁Conseil 1 +▁j 1 +o 1 +▁travail 1 +▁non 1 +▁États 1 +▁même 1 +ci 1 +▁B 1 +▁droits 1 +▁leurs 1 +▁deux 1 +▁on 1 +re 1 +▁ainsi 1 +). 1 +▁aussi 1 +▁Je 1 +ons 1 +▁services 1 +▁Commission 1 +in 1 +▁Nous 1 +▁bien 1 +C 1 +ation 1 +▁également 1 +▁cas 1 +z 1 +autres 1 +▁F 1 +▁in 1 +▁» 1 +u 1 +n 1 +▁dont 1 +i 1 +▁p 1 +▁? 1 +), 1 +▁personnes 1 +▁Comité 1 +▁sous 1 +▁sécurité 1 +▁membres 1 +▁T 1 +▁J 1 +l 1 +▁P 1 +le 1 +▁projet 1 +▁où 1 +▁droit 1 +▁santé 1 +▁autres 1 +P 1 +▁2 1 +f 1 +al 1 +▁était 1 +▁doit 1 +▁très 1 +▁contre 1 +▁partie 1 +m 1 +ur 1 +▁Nations 1 +D 1 +ra 1 +▁% 1 +▁" 1 +▁3 1 +▁R 1 +M 1 +an 1 +▁Dans 1 +h 1 +▁programme 1 +▁cadre 1 +▁cours 1 +▁soit 1 +▁dé 1 +▁Unies 1 +▁sans 1 +B 1 +article 1 +elle 1 +▁notre 1 +▁elle 1 +▁re 1 +▁m 1 +▁général 1 +▁mesures 1 +▁G 1 +▁compte 1 +▁avons 1 +p 1 +© 1 +ils 1 +▁activités 1 +▁ré 1 +▁demande 1 +▁ans 1 +és 1 +▁afin 1 +▁question 1 +▁système 1 +▁nombre 1 +▁données 1 +E 1 +à 1 +or 1 +▁5 1 +ée 1 +b 1 +L 1 +▁mise 1 +▁toutes 1 +▁peuvent 1 +▁Ce 1 +it 1 +ont 1 +▁b 1 +▁monde 1 +▁femmes 1 +ez 1 +▁4 1 +aient 1 +▁Pour 1 +▁— 1 +▁matière 1 +▁politique 1 +▁point 1 +▁moins 1 +information 1 +ou 1 +être 1 +▁vie 1 +▁recherche 1 +▁10 1 +▁E 1 +▁ils 1 +▁enfants 1 +▁jour 1 +▁votre 1 +▁On 1 +▁temps 1 +O 1 +▁gestion 1 +▁avait 1 +▁De 1 +▁19 1 +▁concernant 1 +▁questions 1 +T 1 +N 1 +▁À 1 +▁gouvernement 1 +▁façon 1 +us 1 +▁Si 1 +v 1 +▁ressources 1 +▁Mais 1 +▁nos 1 +▁générale 1 +▁niveau 1 +F 1 +H 1 +à 1 +▁autre 1 +R 1 +▁produits 1 +▁vue 1 +▁doivent 1 +▁donc 1 +▁6 1 +▁programmes 1 +G 1 +▁lui 1 +▁processus 1 +au 1 +▁internationale 1 +ne 1 +▁protection 1 +▁N 1 +▁avoir 1 +▁politiques 1 +▁toute 1 +▁notamment 1 +ce 1 +▁plan 1 +▁ça 1 +▁fois 1 +▁cet 1 +▁dit 1 +2 1 +] 1 +▁Un 1 +▁me 1 +homme 1 +▁encore 1 +▁cela 1 +is 1 +▁– 1 +▁international 1 +▁chaque 1 +▁mon 1 +▁paragraphe 1 +aux 1 +▁ma 1 +▁session 1 +▁économique 1 +g 1 +▁[ 1 +▁K 1 +▁sera 1 +▁européenne 1 +▁avant 1 +▁H 1 +▁résolution 1 +▁après 1 +ité 1 +1 1 +▁7 1 +at 1 +▁Par 1 +▁trois 1 +ar 1 +▁années 1 +age 1 +ir 1 +eau 1 +te 1 +▁fin 1 +ement 1 +▁base 1 +▁situation 1 +▁personne 1 +▁lieu 1 +État 1 +ées 1 +▁Convention 1 +▁Une 1 +▁pro 1 +▁résultats 1 +k 1 +▁Cette 1 +▁20 1 +▁période 1 +▁groupe 1 +▁gens 1 +▁loi 1 +▁place 1 +▁8 1 +▁décision 1 +▁9 1 +▁selon 1 +▁mesure 1 +▁I 1 +▁Au 1 +3 1 +id 1 +▁beaucoup 1 +▁formation 1 +▁15 1 +▁exemple 1 +▁peu 1 +ance 1 +▁présent 1 +té 1 +ca 1 +▁h 1 +▁titre 1 +▁V 1 +▁secteur 1 +▁manière 1 +▁œuvre 1 +▁tant 1 +ing 1 +▁renseignements 1 +ais 1 +▁particulier 1 +▁Elle 1 +? 1 +▁première 1 +action 1 +▁000 1 +ie 1 +▁pré 1 +" 1 +▁of 1 +▁part 1 +▁coopération 1 +▁prendre 1 +▁car 1 +▁pendant 1 +▁mettre 1 +▁public 1 +▁W 1 +▁devrait 1 +▁certains 1 +Assemblée 1 +▁compris 1 +▁Président 1 +ch 1 +▁g 1 +▁Vous 1 +Union 1 +▁étaient 1 +application 1 +▁contrôle 1 +▁inter 1 +▁suis 1 +▁Ces 1 +▁Loi 1 +▁société 1 +▁12 1 +▁rôle 1 +▁service 1 +▁depuis 1 +▁grande 1 +▁qualité 1 +▁ceux 1 +▁national 1 +( 1 +▁nouveau 1 +▁parce 1 +▁’ 1 +évaluation 1 +▁marché 1 +▁plusieurs 1 +ut 1 +▁voir 1 +▁Groupe 1 +▁personnel 1 +4 1 +▁faut 1 +▁dollars 1 +▁grand 1 +les 1 +▁domaine 1 +▁Des 1 +▁seulement 1 +▁dispositions 1 +▁f 1 +▁chose 1 +Unis 1 +V 1 +▁con 1 +▁quand 1 +▁jusqu 1 +▁parties 1 +▁$ 1 +la 1 +▁conditions 1 +aire 1 +▁besoin 1 +ad 1 +était 1 +▁projets 1 +▁savoir 1 +de 1 +▁publique 1 +▁18 1 +ro 1 +▁millions 1 +ex 1 +▁nom 1 +▁eu 1 +0 1 +um 1 +el 1 +aide 1 +▁entreprises 1 +▁important 1 +hui 1 +▁vers 1 +▁fonction 1 +▁premier 1 +▁possible 1 +▁laquelle 1 +▁11 1 +ter 1 +▁nationale 1 +▁population 1 +▁Secrétaire 1 +▁document 1 +▁30 1 +▁région 1 +ine 1 +▁nouvelle 1 +▁pourrait 1 +able 1 +et 1 +▁objectifs 1 +▁Re 1 +▁moment 1 +00 1 +im 1 +▁présente 1 +▁problèmes 1 +U 1 +▁ci 1 +6 1 +▁O 1 +ri 1 +elles 1 +environnement 1 +▁mois 1 +ac 1 +▁alors 1 +▁groupes 1 +▁travaux 1 +▁taux 1 +▁13 1 +emploi 1 +5 1 +di 1 +▁va 1 +▁Programme 1 +am 1 +▁lors 1 +▁conformément 1 +▁organisations 1 +oc 1 +Organisation 1 +▁nouvelles 1 +▁paix 1 +▁sujet 1 +▁déjà 1 +▁ex 1 +▁concerne 1 +▁seront 1 +x 1 +▁raison 1 +th 1 +▁organismes 1 +▁date 1 +7 1 +w 1 +que 1 +▁co 1 +▁soient 1 +▁Ils 1 +▁toujours 1 +▁> 1 +examen 1 +utilisation 1 +li 1 +▁vertu 1 +▁produit 1 +▁dire 1 +▁visant 1 +▁sommes 1 +▁problème 1 +▁site 1 +▁DE 1 +▁elles 1 +▁règlement 1 +▁mis 1 +▁moyen 1 +▁quelque 1 +eur 1 +▁14 1 +▁ayant 1 +op 1 +▁informations 1 +ul 1 +ions 1 +▁efforts 1 +▁and 1 +▁r 1 +▁décembre 1 +▁nouveaux 1 +▁suite 1 +autre 1 +ins 1 +me 1 +I 1 +▁the 1 +▁financement 1 +éducation 1 +É 1 +par 1 +▁besoins 1 +▁pouvoir 1 +▁donné 1 +▁é 1 +▁chez 1 +ton 1 +▁celui 1 +▁production 1 +▁comment 1 +▁certaines 1 +▁Bureau 1 +▁celle 1 +▁In 1 +▁terme 1 +▁année 1 +ale 1 +▁serait 1 +ta 1 +ol 1 +▁relatives 1 +▁documents 1 +ion 1 +▁vraiment 1 +▁mondiale 1 +Rires 1 +▁tenu 1 +ordre 1 +▁effet 1 +▁forme 1 +▁; 1 +▁transport 1 +▁pris 1 +ot 1 +▁participation 1 +che 1 +▁là 1 +▁maintenant 1 +▁canadienne 1 +▁choses 1 +▁juin 1 +▁risque 1 +ique 1 +▁durable 1 +▁quelques 1 +▁prix 1 +administration 1 +▁16 1 +ill 1 +.1 1 +art 1 +rait 1 +▁Gouvernement 1 +▁valeur 1 +ré 1 +▁canadien 1 +▁soutien 1 +▁ni 1 +▁nécessaire 1 +▁République 1 +▁lorsque 1 +▁2005 1 +▁outre 1 +om 1 +eux 1 +9 1 +▁font 1 +▁mieux 1 +▁type 1 +▁2006 1 +▁traitement 1 +▁respect 1 +▁donner 1 +» 1 +▁jeunes 1 +accès 1 +ensemble 1 +eurs 1 +▁long 1 +ér 1 +ront 1 +▁17 1 +ti 1 +▁rapports 1 +▁communauté 1 +▁2007 1 +▁cause 1 +après 1 +lo 1 +▁pouvez 1 +▁fonds 1 +▁social 1 +▁v 1 +▁dis 1 +▁ici 1 +ner 1 +▁devraient 1 +con 1 +agit 1 +▁institutions 1 +▁sein 1 +▁Parlement 1 +▁européen 1 +8 1 +▁commerce 1 +ig 1 +qu 1 +ence 1 +▁Con 1 +▁note 1 +▁heures 1 +▁avaient 1 +ic 1 +▁Mme 1 +▁no 1 +è 1 +▁aujourd 1 +▁moi 1 +ay 1 +▁capacité 1 +▁Cour 1 +ier 1 +▁0 1 +▁Conférence 1 +▁mars 1 +▁mandat 1 +▁dépenses 1 +▁septembre 1 +▁100 1 +▁liste 1 +▁2004 1 +os 1 +▁réunion 1 +▁ministre 1 +▁montant 1 +▁ch 1 +ES 1 +if 1 +▁Ministère 1 +▁but 1 +industrie 1 +▁demandé 1 +▁création 1 +▁créer 1 +ve 1 +▁affaires 1 +▁budget 1 +AC 1 +▁nécessaires 1 +▁mai 1 +Europe 1 +▁po 1 +▁spécial 1 +▁fournir 1 +année 1 +▁procédure 1 +ure 1 +▁quatre 1 +▁systèmes 1 +▁to 1 +▁avez 1 +.2 1 +▁sens 1 +min 1 +▁É 1 +accord 1 +▁internationales 1 +▁jours 1 +▁auprès 1 +▁souvent 1 +▁sociale 1 +▁sorte 1 +▁famille 1 +▁25 1 +▁recommandations 1 +▁étant 1 +vi 1 +▁normes 1 +▁éléments 1 +▁renforcer 1 +▁pratiques 1 +▁différents 1 +▁juste 1 +▁technique 1 +ag 1 +▁cinq 1 +▁lutte 1 +ants 1 +▁permis 1 +▁celles 1 +isation 1 +▁croissance 1 +dé 1 +▁déclaration 1 +exercice 1 +▁moyens 1 +▁Donc 1 +co 1 +▁ra 1 +▁trouver 1 +▁plupart 1 +▁juillet 1 +▁communication 1 +▁Cela 1 +▁bon 1 +▁soins 1 +voir 1 +K 1 +▁début 1 +aires 1 +ard 1 +ia 1 +lé 1 +ab 1 +▁fins 1 +ive 1 +▁pense 1 +▁pratique 1 +▁permet 1 +▁vos 1 +▁Rapport 1 +objet 1 +tant 1 +man 1 +▁jamais 1 +▁domaines 1 +▁suivi 1 +ed 1 +▁progrès 1 +pos 1 +to 1 +▁décisions 1 +com 1 +iv 1 +▁2008 1 +ici 1 +UE 1 +ONU 1 +J 1 +▁indiqué 1 +▁technologie 1 +▁deuxième 1 +▁2003 1 +▁21 1 +▁autorités 1 +▁partir 1 +▁24 1 +▁ligne 1 +▁Ma 1 +▁économiques 1 +▁Comme 1 +▁novembre 1 +▁proposition 1 +▁nombreux 1 +▁Centre 1 +▁haut 1 +end 1 +▁janvier 1 +▁appel 1 +élaboration 1 +▁bonne 1 +W 1 +▁2002 1 +▁eux 1 +sion 1 +entre 1 +▁». 1 +na 1 +ère 1 +sse 1 +▁régions 1 +ge 1 +▁responsabilité 1 +▁promotion 1 +appui 1 +▁aider 1 +▁accord 1 +▁opérations 1 +▁assurer 1 +▁règles 1 +▁membre 1 +_ 1 +▁aucune 1 +vous 1 +▁avril 1 +▁31 1 +▁effets 1 +▁Nord 1 +▁octobre 1 +▁adopté 1 +▁dernier 1 +Office 1 +▁reçu 1 +▁violence 1 +.3 1 +▁total 1 +▁vol 1 +CE 1 +▁référence 1 +▁stratégie 1 +▁charge 1 +▁représentants 1 +▁devant 1 +mo 1 +▁déterminer 1 +▁nature 1 +▁ministère 1 +▁réponse 1 +▁différentes 1 +énergie 1 +son 1 +▁justice 1 +% 1 +▁mes 1 +▁techniques 1 +ap 1 +▁grâce 1 +▁représentant 1 +▁an 1 +assurer 1 +▁fond 1 +as 1 +▁à 1 +▁termes 1 +▁permettre 1 +nes 1 +▁comp 1 +all 1 +▁relative 1 +▁prises 1 +ign 1 +▁», 1 +▁pu 1 +▁lorsqu 1 +▁autochtones 1 +▁environ 1 +▁2000 1 +▁Ch 1 +od 1 +▁nationales 1 +▁nationaux 1 +▁hommes 1 +▁points 1 +▁changement 1 +▁coût 1 +▁principes 1 +mi 1 +air 1 +égalité 1 +▁section 1 +▁vigueur 1 +▁relations 1 +▁reste 1 +... 1 +intérêt 1 +▁revenu 1 +▁50 1 +▁coûts 1 +▁changements 1 +AN 1 +▁mondial 1 +▁aide 1 +▁face 1 +▁frais 1 +origine 1 +tique 1 +▁devons 1 +▁législation 1 +▁risques 1 +exécution 1 +▁propre 1 +importance 1 +exploitation 1 +▁22 1 +▁poste 1 +▁2001 1 +▁certain 1 +tra 1 +ub 1 +▁améliorer 1 +avoir 1 +égard 1 +établissement 1 +▁tels 1 +▁passé 1 +▁civile 1 +tes 1 +int 1 +▁gouvernements 1 +IS 1 +iste 1 +▁Règlement 1 +▁Tribunal 1 +▁délégation 1 +va 1 +▁commun 1 +▁pourquoi 1 +▁séance 1 +▁aucun 1 +ue 1 +▁rendre 1 +▁t 1 +ables 1 +▁promouvoir 1 +▁efficace 1 +▁divers 1 +▁près 1 +▁internationaux 1 +▁débat 1 +gu 1 +ateurs 1 +▁pourraient 1 +▁propriété 1 +▁lequel 1 +du 1 +▁régime 1 +▁avis 1 +▁per 1 +per 1 +▁dernière 1 +urs 1 +état 1 +▁importante 1 +▁procédures 1 +▁ensemble 1 +▁puis 1 +▁réseau 1 +▁discrimination 1 +▁financière 1 +▁di 1 +▁position 1 +.4 1 +istes 1 +▁réduire 1 +lu 1 +▁plutôt 1 +▁travers 1 +▁tel 1 +ER 1 +▁traité 1 +ite 1 +▁Web 1 +); 1 +ations 1 +▁participants 1 +▁centre 1 +▁No 1 +organisation 1 +ver 1 +▁sûr 1 +dic 1 +▁employés 1 +▁direction 1 +pl 1 +▁donne 1 +Y 1 +pe 1 +▁actuellement 1 +▁égard 1 +▁for 1 +▁principe 1 +ma 1 +▁président 1 +▁niveaux 1 +▁23 1 +▁St 1 +ob 1 +▁́ 1 +▁prise 1 +▁comprendre 1 +▁Pro 1 +▁Se 1 +▁approche 1 +▁obtenir 1 +▁contexte 1 +res 1 +tre 1 +IC 1 +▁examiné 1 +▁Bien 1 +▁seule 1 +▁partenaires 1 +▁pouvons 1 +▁texte 1 +▁publié 1 +▁Alors 1 +ateur 1 +be 1 +iques 1 +ante 1 +and 1 +▁présenter 1 +▁coordination 1 +▁contenu 1 +▁réduction 1 +▁culture 1 +▁Sa 1 +▁trouve 1 +▁créé 1 +▁collaboration 1 +▁fa 1 +▁territoire 1 +▁utiliser 1 +▁nécessité 1 +▁études 1 +▁juridique 1 +tu 1 +▁décidé 1 +isme 1 +▁Br 1 +org 1 +our 1 +lement 1 +activité 1 +30 1 +▁porte 1 +▁Canadiens 1 +▁matériel 1 +▁disposition 1 +▁sociaux 1 +▁fédéral 1 +▁existe 1 +ction 1 +analyse 1 +▁examen 1 +▁vu 1 +▁prévention 1 +ph 1 +▁milieu 1 +▁surveillance 1 +▁Dé 1 +adoption 1 +ssent 1 +▁soumis 1 +▁répondre 1 +▁bureau 1 +né 1 +ification 1 +▁telle 1 +50 1 +▁trop 1 +▁modèle 1 +▁capacités 1 +gi 1 +▁preuve 1 +▁secteurs 1 +euse 1 +appel 1 +▁oeuvre 1 +▁moyenne 1 +▁réalisation 1 +ist 1 +▁structure 1 +▁demandes 1 +▁présenté 1 +▁seul 1 +▁Fonds 1 +▁dernières 1 +▁exigences 1 +▁marchés 1 +enseignement 1 +▁État 1 +Agence 1 +▁tenir 1 +▁possibilité 1 +▁maintien 1 +▁davantage 1 +efficacité 1 +▁proposé 1 +abord 1 +▁participer 1 +▁déc 1 +▁Toutefois 1 +▁faveur 1 +IN 1 +▁pêche 1 +ille 1 +ticulièrement 1 +▁recours 1 +él 1 +▁initiatives 1 +▁police 1 +▁tard 1 +▁février 1 +▁propos 1 +pa 1 +économie 1 +▁ad 1 +▁guerre 1 +▁conférence 1 +assurance 1 +ga 1 +▁zone 1 +▁1999 1 +▁technologies 1 +▁durant 1 +avis 1 +▁mer 1 +:// 1 +annexe 1 +▁page 1 +go 1 +▁Ro 1 +▁suivants 1 +nd 1 +ON 1 +▁ville 1 +▁établi 1 +ten 1 +▁ailleurs 1 +▁simple 1 +vis 1 +▁compétences 1 +20 1 +▁mission 1 +yn 1 +▁nombreuses 1 +▁voie 1 +mé 1 +ois 1 +▁réserve 1 +▁conseil 1 +▁zones 1 +qui 1 +▁déclaré 1 +▁tu 1 +▁sol 1 +▁e 1 +auteur 1 +ord 1 +▁accès 1 +▁al 1 +ang 1 +▁telles 1 +ho 1 +▁vérification 1 +▁difficile 1 +iers 1 +do 1 +▁Sud 1 +ette 1 +▁quant 1 +▁numéro 1 +▁pauvreté 1 +15 1 +▁28 1 +▁large 1 +▁publics 1 +/2 1 +▁femme 1 +▁petit 1 +▁commencé 1 +▁semble 1 +▁mort 1 +▁six 1 +▁constitue 1 +igne 1 +Afrique 1 +▁vais 1 +tion 1 +ible 1 +serv 1 +▁mal 1 +même 1 +° 1 +▁comité 1 +use 1 +▁Santé 1 +▁citoyens 1 +▁choix 1 +▁parler 1 +▁définition 1 +▁26 1 +▁genre 1 +www 1 +▁principaux 1 += 1 +Uni 1 +▁suivant 1 +▁armes 1 +13 1 +▁transfert 1 +▁! 1 +▁durée 1 +▁humaines 1 +ry 1 +▁privé 1 +▁condition 1 +▁II 1 +▁méthodes 1 +▁Monsieur 1 +▁Tout 1 +▁série 1 +▁tenue 1 +▁état 1 +▁directement 1 +▁gouvernementale 1 +hi 1 +▁Al 1 +-1 1 +▁40 1 +▁continuer 1 +▁application 1 +▁quoi 1 +▁27 1 +▁mé 1 +▁directive 1 +dire 1 +▁travailleurs 1 +▁valeurs 1 +▁fonctions 1 +tro 1 +étude 1 +ise 1 +ct 1 +▁i 1 +▁rien 1 +▁directeur 1 +▁marchandises 1 +▁diverses 1 +▁victimes 1 +11 1 +gc 1 +▁ensuite 1 +▁continue 1 +ienne 1 +▁rec 1 +da 1 +EN 1 +▁protéger 1 +▁op 1 +▁parmi 1 +▁sociétés 1 +▁langue 1 +▁passe 1 +Applaudissements 1 +form 1 +10 1 +▁aurait 1 +j 1 +▁Voici 1 +▁plans 1 +▁contribution 1 +▁Y 1 +▁o 1 +▁communautaire 1 +▁rendement 1 +▁financières 1 +▁demander 1 +▁trans 1 +port 1 +▁offre 1 +enfant 1 +Ontario 1 +▁travailler 1 +▁comprend 1 +▁Sel 1 +▁réglementation 1 +▁articles 1 +tation 1 +▁liberté 1 +▁défense 1 +▁construction 1 +▁faisant 1 +ég 1 +▁lois 1 +ib 1 +intégration 1 +▁br 1 +▁fédérale 1 +▁disponibles 1 +▁Secrétariat 1 +mar 1 +▁permanent 1 +▁statut 1 +▁Parties 1 +▁principal 1 +val 1 +rie 1 +▁aspects 1 +▁critères 1 +ec 1 +▁renforcement 1 +▁financiers 1 +▁utilisé 1 +av 1 +▁facteurs 1 +érer 1 +bi 1 +nel 1 +▁étude 1 +▁commission 1 +▁New 1 +intention 1 +▁classe 1 +entreprise 1 +▁connaissances 1 +▁réunions 1 +▁contact 1 +▁priorités 1 +▁planification 1 +ç 1 +lor 1 +▁biens 1 +▁solution 1 +▁rendu 1 +▁obligations 1 +▁main 1 +▁rend 1 +▁contribuer 1 +ales 1 +▁gaz 1 +investissement 1 +ven 1 +▁salle 1 +▁contrat 1 +▁imp 1 +urgence 1 +▁consultatif 1 +▁permettant 1 +▁anti 1 +▁Québec 1 +▁août 1 +▁élevé 1 +▁fer 1 +▁http 1 +X 1 +AT 1 +io 1 +▁centrale 1 +▁côté 1 +ide 1 +▁Z 1 +▁conséquent 1 +ate 1 +▁X 1 +▁crise 1 +ux 1 +▁cent 1 +▁langues 1 +▁propositions 1 +expérience 1 +dessus 1 +ger 1 +▁Mo 1 +AR 1 +▁canadiennes 1 +je 1 +▁Ré 1 +▁propres 1 +▁suivantes 1 +▁électronique 1 +▁responsable 1 +CI 1 +▁sélection 1 +▁Co 1 +▁version 1 +▁Inc 1 +▁faciliter 1 +▁relatifs 1 +▁stratégique 1 +ét 1 +▁succès 1 +objectif 1 +▁Déclaration 1 +▁assez 1 +▁60 1 +avais 1 +▁terrorisme 1 +▁Cependant 1 +▁peine 1 +▁rapidement 1 +▁démocratique 1 +▁annuel 1 +▁présentation 1 +▁Plan 1 +12 1 +▁postes 1 +ali 1 +ives 1 +▁tour 1 +▁portant 1 +▁écrit 1 +▁clients 1 +▁Mar 1 +▁Internet 1 +▁grandes 1 +▁force 1 +▁faible 1 +▁passer 1 +▁Membres 1 +▁Services 1 +▁Europe 1 +▁Qu 1 +ow 1 +▁surtout 1 +▁29 1 +▁mot 1 +cul 1 +▁régional 1 +▁Article 1 +spect 1 +▁chargé 1 +▁mécanismes 1 +▁financier 1 +no 1 +gé 1 +st 1 +EC 1 +▁veux 1 +cour 1 +▁fl 1 +ens 1 +assistance 1 +▁estime 1 +▁Code 1 +▁unique 1 +enquête 1 +rons 1 +▁trait 1 +▁communautés 1 +▁conclu 1 +▁supplémentaires 1 +▁manque 1 +essai 1 +▁Protocole 1 +lic 1 +▁maison 1 +▁vote 1 +▁chapitre 1 +▁sources 1 +▁chacun 1 +em 1 +▁rapide 1 +échelle 1 +▁caractère 1 +▁U 1 +▁provisoire 1 +▁entreprise 1 +▁meilleure 1 +▁médias 1 +14 1 +ha 1 +▁Car 1 +▁crois 1 +▁prend 1 +alité 1 +▁liés 1 +IT 1 +▁troisième 1 +tent 1 +éri 1 +▁dialogue 1 +▁/ 1 +▁humains 1 +▁chef 1 +▁aura 1 +▁relatif 1 +▁local 1 +▁fr 1 +uit 1 +land 1 +▁◦ 1 +▁importants 1 +▁voyage 1 +▁liées 1 +▁suit 1 +enregistrement 1 +oy 1 +▁Sur 1 +tr 1 +AL 1 +intérieur 1 +▁Service 1 +▁bas 1 +▁Chine 1 +tribu 1 +absence 1 +up 1 +▁terrain 1 +▁code 1 +Amérique 1 +pé 1 +isse 1 +avait 1 +▁coup 1 +▁possibilités 1 +▁fonctionnement 1 +▁dés 1 +▁instruments 1 +▁secrétariat 1 +ern 1 +▁contributions 1 +▁1, 1 +▁ait 1 +étais 1 +nelle 1 +▁VIH 1 +▁ET 1 +▁responsables 1 +▁conflit 1 +▁prévoit 1 +▁présence 1 +▁conflits 1 +▁LA 1 +ire 1 +▁milliards 1 +lant 1 +25 1 +▁méthode 1 +ly 1 +tri 1 +▁Quand 1 +▁Ministre 1 +▁ministères 1 +améliorer 1 +ris 1 +▁visite 1 +▁port 1 +▁juge 1 +▁DES 1 +iser 1 +▁organes 1 +▁avantages 1 +▁vise 1 +vers 1 +▁intérieur 1 +tiques 1 +▁êtes 1 +▁libre 1 +nous 1 +oul 1 +entrée 1 +▁espèces 1 +avenir 1 +histoire 1 +▁réforme 1 +ché 1 +ments 1 +▁particulière 1 +▁garantir 1 +.5 1 +ug 1 +▁maladie 1 +▁1998 1 +▁dès 1 +▁intérêts 1 +▁Ex 1 +OR 1 +ière 1 +▁anglais 1 +▁comportement 1 +▁forces 1 +br 1 +40 1 +cher 1 +innovation 1 +▁puisse 1 +▁tiers 1 +âge 1 +▁convient 1 +▁vente 1 +▁clairement 1 +▁évaluation 1 +▁actuelle 1 +▁modifications 1 +▁convention 1 +der 1 +▁délai 1 +▁source 1 +gue 1 +▁formes 1 +act 1 +▁Après 1 +▁nord 1 +18 1 +▁toutefois 1 +▁commune 1 +éd 1 +▁grands 1 +▁actions 1 +▁consultations 1 +▁réalité 1 +▁pi 1 +▁quelle 1 +▁canadiens 1 +▁raisons 1 +nant 1 +mer 1 +▁simplement 1 +▁Mon 1 +▁marque 1 +▁lesquelles 1 +▁types 1 +▁défini 1 +▁Me 1 +▁objectif 1 +▁réfugiés 1 +▁scientifiques 1 +▁Etats 1 +& 1 +▁Ainsi 1 +▁stratégies 1 +nal 1 +espace 1 +nement 1 +â 1 +▁ai 1 +▁représente 1 +▁met 1 +▁2009 1 +▁fonctionnaires 1 +qué 1 +▁physique 1 +▁1. 1 +alinéa 1 +▁servir 1 +ell 1 +▁regard 1 +▁enregistré 1 +▁parents 1 +occasion 1 +▁voudrais 1 +▁maladies 1 +RE 1 +experts 1 +▁relativement 1 +▁publiques 1 +▁prévu 1 +▁route 1 +▁obtenu 1 +▁The 1 +▁Merci 1 +▁train 1 +ative 1 +19 1 +prim 1 +▁Vi 1 +▁émissions 1 +ies 1 +▁plein 1 +ni 1 +des 1 +inter 1 +▁com 1 +▁choisi 1 +ient 1 +éc 1 +▁quantité 1 +dit 1 +▁quel 1 +ix 1 +▁lettre 1 +▁Haut 1 +▁lumière 1 +ev 1 +▁crédit 1 +08 1 +▁article 1 +ités 1 +▁faites 1 +▁professionnelle 1 +expression 1 +▁lesquels 1 +RC 1 +▁Comment 1 +Q 1 +▁spéciale 1 +▁tableau 1 +▁consultation 1 +▁négociations 1 +idée 1 +▁collectivités 1 +▁sub 1 +AP 1 +ler 1 +ade 1 +isé 1 +▁(19 1 +hé 1 +▁veut 1 +▁petits 1 +▁priorité 1 +ène 1 +▁interne 1 +16 1 +▁inc 1 +▁examiner 1 +ron 1 +▁majorité 1 +▁Depuis 1 +▁centres 1 +nu 1 +▁locaux 1 +▁militaire 1 +▁jouer 1 +▁phase 1 +▁corps 1 +amélioration 1 +▁utilisés 1 +▁établissements 1 +▁confiance 1 +▁consommation 1 +édi 1 +▁Direction 1 +▁populations 1 +ifié 1 +▁Amendement 1 +▁concurrence 1 +ous 1 +euses 1 +▁radio 1 +▁liens 1 +▁petite 1 +ast 1 +éré 1 +▁consiste 1 +▁Nouvelle 1 +▁Elles 1 +▁établir 1 +▁observations 1 +▁▪ 1 +ka 1 +▁atteint 1 +▁juridiques 1 +ak 1 +▁taille 1 +bo 1 +onne 1 +rais 1 +▁signifie 1 +▁statistiques 1 +▁tr 1 +▁accords 1 +60 1 +out 1 +ind 1 +-2 1 +▁champ 1 +▁suivre 1 +▁militaires 1 +▁Or 1 +sida 1 +▁pr 1 +SE 1 +applique 1 +▁conclusions 1 +▁aller 1 +@ 1 +IR 1 +lin 1 +▁multi 1 +▁Royaume 1 +argent 1 +▁savez 1 +▁candidats 1 +Ouest 1 +non 1 +▁haute 1 +fr 1 +▁province 1 +CN 1 +ort 1 +▁caractéristique 1 +▁lignes 1 +▁organisation 1 +▁petites 1 +équipe 1 +▁An 1 +▁k 1 +▁Afrique 1 +▁Lorsque 1 +▁approuvé 1 +évolution 1 +mp 1 +▁Colombie 1 +▁responsabilités 1 +▁agents 1 +éro 1 +▁déposé 1 +05 1 +ati 1 +▁Communauté 1 +▁pied 1 +tte 1 +uc 1 +/1 1 +▁partage 1 +▁final 1 +▁conçu 1 +▁terres 1 +▁scientifique 1 +teur 1 +▁direct 1 +pt 1 +▁communications 1 +▁campagne 1 +ich 1 +▁prévues 1 +▁www 1 +▁1995 1 +▁parti 1 +▁meilleur 1 +▁globale 1 +échange 1 +▁pla 1 +ép 1 +▁forte 1 +▁conséquences 1 +ème 1 +▁concept 1 +ke 1 +▁Sous 1 +▁col 1 +▁entendu 1 +ssant 1 +03 1 +▁figure 1 +▁suivante 1 +▁régionaux 1 +▁pouvait 1 +▁régionales 1 +▁solutions 1 +▁développer 1 +▁sites 1 +▁transports 1 +▁catégorie 1 +▁traite 1 +▁veiller 1 +▁cor 1 +▁peuple 1 +▁familles 1 +▁humaine 1 +▁Date 1 +aine 1 +▁généralement 1 +inc 1 +iez 1 +▁télé 1 +▁licence 1 +▁portée 1 +▁capital 1 +▁sociales 1 +▁France 1 +▁annexe 1 +▁or 1 +▁siècle 1 +▁mère 1 +AS 1 +▁etc 1 +mb 1 +ts 1 +▁terre 1 +ong 1 +▁photo 1 +▁longue 1 +▁modification 1 +apprentissage 1 +▁presque 1 +17 1 +▁action 1 +▁élément 1 +▁semaine 1 +▁conseils 1 +▁connaissance 1 +▁pénale 1 +▁Document 1 +CC 1 +Britannique 1 +eu 1 +yl 1 +29 1 +▁importantes 1 +▁reconnu 1 +▁actes 1 +▁complète 1 +▁Terre 1 +▁fil 1 +▁organisé 1 +▁activité 1 +▁réaliser 1 +▁mécanisme 1 +01 1 +AD 1 +▁profit 1 +▁trouvé 1 +▁poursuivre 1 +▁man 1 +▁cerveau 1 +▁pleinement 1 +▁partenariat 1 +▁locales 1 +▁̈ 1 +▁spécifiques 1 +▁2004, 1 +▁1997 1 +▁analyse 1 +▁résultat 1 +tal 1 +▁& 1 +ban 1 +erie 1 +élimination 1 +▁augmentation 1 +! 1 +▁composé 1 +▁fort 1 +▁mar 1 +▁idée 1 +▁expérience 1 +▁2, 1 +ces 1 +▁décrit 1 +▁uniquement 1 +▁cons 1 +amp 1 +▁2005, 1 +▁vrai 1 +abilité 1 +▁atteindre 1 +▁récemment 1 +▁micro 1 +app 1 +▁recommande 1 +▁tri 1 +▁médicaments 1 +▁provenant 1 +ien 1 +▁peuples 1 +▁parc 1 +don 1 +pp 1 +▁distribution 1 +ach 1 +▁dispose 1 +▁recherches 1 +▁consommateurs 1 +▁Ar 1 +▁Fa 1 +21 1 +TE 1 +▁York 1 +▁nucléaires 1 +▁Russie 1 +impôt 1 +▁2003, 1 +TION 1 +ttes 1 +▁élaboré 1 +▁ouvert 1 +gén 1 +▁dépend 1 +▁quelqu 1 +▁ren 1 +73 1 +▁vis 1 +96 1 +▁changer 1 +attention 1 +▁enfant 1 +\ 1 +▁logement 1 +▁chemin 1 +▁souhaite 1 +▁communiquer 1 +lect 1 +ana 1 +▁compétence 1 +lon 1 +▁dû 1 +ult 1 +▁prêt 1 +▁montre 1 +▁réseaux 1 +▁90 1 +he 1 +tim 1 +▁revenus 1 +▁Banque 1 +▁importe 1 +iz 1 +initiative 1 +▁intellectuelle 1 +vé 1 +▁nations 1 +▁retour 1 +▁Tous 1 +▁seraient 1 +▁doute 1 +▁bo 1 +▁raisonnable 1 +▁bio 1 +ques 1 +ération 1 +ologie 1 +rou 1 +▁défis 1 +▁environnement 1 +▁naturelles 1 +". 1 +▁publication 1 +▁Mission 1 +utiliser 1 +▁emploi 1 +po 1 +can 1 +▁gros 1 +▁relation 1 +67 1 +tur 1 +▁humanitaire 1 +▁Charte 1 +▁dépôt 1 +▁2002, 1 +établir 1 +AM 1 +▁-- 1 +▁DU 1 +ines 1 +▁prestations 1 +▁propose 1 +▁limite 1 +▁contraire 1 +cent 1 +24 1 +▁favoriser 1 +wa 1 +▁judiciaire 1 +▁diversité 1 +heure 1 +infrastructure 1 +iff 1 +▁gra 1 +agriculture 1 +▁précise 1 +▁* 1 +isant 1 +▁traités 1 +obtenir 1 +▁ro 1 +nc 1 +▁essentiel 1 +▁véhicules 1 +▁supérieur 1 +tin 1 +▁respecter 1 +▁2000, 1 +▁occasion 1 +cré 1 +▁gr 1 +▁vont 1 +école 1 +▁livre 1 +▁considérée 1 +gr 1 +examiner 1 +▁exp 1 +Accord 1 +uff 1 +▁fi 1 +ju 1 +▁engagements 1 +▁2001, 1 +▁rue 1 +RI 1 +▁situé 1 +▁utile 1 +▁voyez 1 +US 1 +▁trou 1 +men 1 +▁Pré 1 +▁prestation 1 +▁change 1 +▁alimentaire 1 +▁recommandé 1 +▁eaux 1 +▁Que 1 +▁étudiants 1 +mes 1 +▁conception 1 +▁étrangères 1 +▁sud 1 +▁disponible 1 +▁commercial 1 +offre 1 +▁Annexe 1 +ê 1 +▁Bo 1 +▁bureaux 1 +06 1 +dessous 1 +▁difficultés 1 +” 1 +▁lancé 1 +▁informé 1 +▁chercheurs 1 +nés 1 +▁lien 1 +existence 1 +▁recommandation 1 +▁constituent 1 +att 1 +▁Ba 1 +▁continu 1 +enne 1 +▁parle 1 +▁professionnels 1 +▁loin 1 +▁cycle 1 +mis 1 +TM 1 +issement 1 +▁humain 1 +▁calcul 1 +▁environnemental 1 +▁attention 1 +▁appelé 1 +▁exprimé 1 +venu 1 +▁animaux 1 +▁pression 1 +▁devenir 1 +▁règle 1 +▁produire 1 +▁engagé 1 +▁permettra 1 +ouverture 1 +▁outils 1 +▁matières 1 +▁connaître 1 +▁paiement 1 +▁probablement 1 +▁efficaces 1 +eff 1 +▁parfois 1 +oire 1 +ame 1 +▁35 1 +chant 1 +antes 1 +ose 1 +ration 1 +engagement 1 +▁ép 1 +▁appui 1 +rc 1 +▁démocratie 1 +▁aliments 1 +▁contient 1 +atif 1 +installation 1 +▁1994 1 +▁cependant 1 +▁autant 1 +▁Ça 1 +accent 1 +▁2006, 1 +raient 1 +▁mentionné 1 +adresse 1 +▁super 1 +▁histoire 1 +▁déclarations 1 +▁autour 1 +PE 1 +▁substances 1 +rain 1 +▁transition 1 +▁faite 1 +▁permettent 1 +▁français 1 +90 1 +▁réponses 1 +▁filles 1 +lan 1 +▁film 1 +▁“ 1 +étranger 1 +▁ta 1 +voy 1 +év 1 +▁principales 1 +▁commencer 1 +▁Ad 1 +▁totale 1 +▁prochaine 1 +▁court 1 +▁traiter 1 +▁joue 1 +▁80 1 +pro 1 +▁principalement 1 +▁Division 1 +▁venir 1 +▁investissements 1 +▁élections 1 +▁porter 1 +▁Ca 1 +▁peux 1 +4) 1 +▁préparation 1 +▁Ottawa 1 +▁post 1 +▁pouvoirs 1 +▁modèles 1 +▁devait 1 +▁all 1 +▁Li 1 +▁surface 1 +▁souligné 1 +▁installations 1 +▁diffusion 1 +▁considération 1 +ui 1 +ori 1 +▁Enfin 1 +▁formulaire 1 +▁annuelle 1 +▁ca 1 +▁Projet 1 +lig 1 +▁missions 1 +▁so 1 +▁téléphone 1 +▁directives 1 +▁dix 1 +▁carte 1 +▁cha 1 +log 1 +▁fourni 1 +▁indique 1 +ov 1 +▁vidéo 1 +09 1 +▁officielles 1 +alisation 1 +lit 1 +▁considère 1 +▁modifier 1 +▁provinces 1 +€ 1 +▁ba 1 +▁Département 1 +cl 1 +approbation 1 +ît 1 +▁mots 1 +▁étrangers 1 +ATION 1 +▁tra 1 +accueil 1 +▁comporte 1 +▁dossier 1 +agissant 1 +▁puissent 1 +▁Cet 1 +▁sou 1 +▁correspondant 1 +ures 1 +* 1 +▁science 1 +ical 1 +▁vision 1 +▁montrer 1 +ty 1 +23 1 +TA 1 +▁courant 1 +▁w 1 +uis 1 +▁danger 1 +▁ob 1 +naire 1 +▁siège 1 +mon 1 +là 1 +▁test 1 +▁App 1 +▁connu 1 +▁évaluer 1 +orientation 1 +tic 1 +78 1 +ô 1 +▁vivant 1 +▁comptes 1 +▁menace 1 +▁commerciales 1 +▁semaines 1 +22 1 +ages 1 +approche 1 +▁vaste 1 +uv 1 +– 1 +▁1996 1 +▁utilisation 1 +02 1 +effet 1 +OU 1 +▁Saint 1 +66 1 +▁urbain 1 +(1) 1 +▁minutes 1 +▁enquête 1 +▁Com 1 +▁communiqué 1 +▁mode 1 +▁exige 1 +▁communautaires 1 +vo 1 +vent 1 +▁compter 1 +▁penser 1 +cept 1 +▁proportion 1 +Institut 1 +mat 1 +ule 1 +▁CE 1 +▁Partie 1 +▁= 1 +impact 1 +ja 1 +PR 1 +▁mêmes 1 +▁maintenir 1 +arrêt 1 +▁sais 1 +▁cellules 1 +26 1 +ba 1 +itu 1 +chi 1 +issant 1 +▁compar 1 +▁500 1 +esprit 1 +so 1 +▁limites 1 +▁Est 1 +▁habit 1 +▁fais 1 +ari 1 +▁Accueil 1 +fi 1 +▁satisfaction 1 +▁préalable 1 +én 1 +▁soutenir 1 +ifs 1 +Add 1 +▁signé 1 +▁1990 1 +▁circonstances 1 +▁commis 1 +hy 1 +▁cré 1 +▁bou 1 +▁villes 1 +▁intervenants 1 +▁45 1 +▁fo 1 +rom 1 +▁désir 1 +▁presse 1 +▁règlements 1 +nov 1 +car 1 +▁rempli 1 +▁Ne 1 +si 1 +▁régionale 1 +imi 1 +ats 1 +obligation 1 +▁candidat 1 +cur 1 +Z 1 +uel 1 +▁Représentant 1 +▁jeu 1 +▁demeure 1 +▁permanente 1 +▁ét 1 +▁fondée 1 +ert 1 +▁considérable 1 +▁né 1 +▁sexuelle 1 +▁sept 1 +tage 1 +▁facile 1 +▁cancer 1 +▁convenu 1 +née 1 +▁volonté 1 +▁Gu 1 +▁Certains 1 +ième 1 +lle 1 +▁guide 1 +EM 1 +▁drogues 1 +▁John 1 +▁scolaire 1 +▁Plus 1 +avons 1 +▁réuni 1 +▁Traité 1 +▁partout 1 +." 1 +▁Na 1 +▁territoires 1 +▁départ 1 +▁patients 1 +▁cour 1 +27 1 +ah 1 +▁Notre 1 +▁conséquence 1 +▁complet 1 +▁prévue 1 +émission 1 +▁réel 1 +▁sport 1 +ity 1 +▁stabilité 1 +augmentation 1 +enfants 1 +▁fondamentaux 1 +pr 1 +▁précis 1 +RS 1 +ani 1 +DE 1 +▁Voir 1 +pi 1 +▁mener 1 +ition 1 +▁Point 1 +▁offert 1 +▁Ra 1 +31 1 +▁stratégiques 1 +ud 1 +tar 1 +iens 1 +▁précédent 1 +▁délégations 1 +équipement 1 +▁défi 1 +cr 1 +tor 1 +▁modifié 1 +▁réaction 1 +ran 1 +▁privée 1 +▁arrive 1 +▁voix 1 +▁III 1 +▁critique 1 +armes 1 +moi 1 +LE 1 +▁seconde 1 +▁vert 1 +resse 1 +▁réussi 1 +ice 1 +▁tribunal 1 +04 1 +▁Dr 1 +▁commentaires 1 +gouvernemental 1 +▁actuel 1 +▁adressée 1 +SC 1 +PC 1 +▁experts 1 +▁désarmement 1 +▁ceci 1 +▁déchets 1 +▁bal 1 +▁offrir 1 +▁devenu 1 +table 1 +▁garde 1 +ole 1 +▁révision 1 +▁ministres 1 +▁autorisé 1 +.6 1 +igné 1 +▁entier 1 +▁Directeur 1 +▁principale 1 +▁adoptée 1 +▁intégrée 1 +▁Ni 1 +07 1 +> 1 +▁poursuite 1 +». 1 +▁tendance 1 +aï 1 +ieux 1 +▁obligatoire 1 +▁%) 1 +▁volume 1 +▁Non 1 +tif 1 +ick 1 +-3 1 +▁chimiques 1 +impression 1 +▁clés 1 +▁fut 1 +affaires 1 +75 1 +informations 1 +▁fabrication 1 +72 1 +▁augmenté 1 +▁vient 1 +▁pan 1 +▁bois 1 +été 1 +arm 1 +appliquer 1 +téri 1 +▁administratives 1 +▁nucléaire 1 +▁pourcentage 1 +sc 1 +▁passage 1 +▁soutenu 1 +▁degré 1 +▁requérant 1 +mand 1 +rant 1 +SA 1 +▁secondaire 1 +▁intitulé 1 +teurs 1 +▁information 1 +▁Son 1 +▁bord 1 +▁Chaque 1 +duc 1 +ifi 1 +▁nommé 1 +▁devez 1 +▁tête 1 +sp 1 +TI 1 +élection 1 +▁prévenir 1 +ano 1 +▁observé 1 +▁active 1 +▁génération 1 +intervention 1 +▁partenariats 1 +uf 1 +▁Même 1 +▁ordinaire 1 +asse 1 +IM 1 +▁gouvernance 1 +▁2. 1 +▁participé 1 +ian 1 +▁précédente 1 +▁écoles 1 +▁conformité 1 +▁70 1 +▁utilisées 1 +▁Réunion 1 +▁intégré 1 +68 1 +▁premiers 1 +▁ouverte 1 +▁recevoir 1 +▁nomination 1 +▁200 1 +83 1 +▁derniers 1 +▁génétique 1 +▁idées 1 +▁potentiel 1 +ns 1 +autorité 1 +cri 1 +▁longtemps 1 +", 1 +▁commence 1 +▁auteurs 1 +ase 1 +▁appelle 1 +▁fourniture 1 +▁milliers 1 +▁st 1 +Université 1 +▁ordre 1 +▁catégories 1 +▁directrices 1 +avant 1 +▁chargée 1 +rit 1 +cadre 1 +▁organisée 1 +▁Avec 1 +▁trafic 1 +▁Pacte 1 +76 1 +bl 1 +▁CO 1 +▁particuliers 1 +▁sciences 1 +28 1 +▁résoudre 1 +▁Constitution 1 +▁Rapporteur 1 +▁pertinentes 1 +▁journal 1 +▁Maintenant 1 +▁apporter 1 +▁civils 1 +79 1 +▁organisme 1 +▁intérêt 1 +▁différence 1 +CR 1 +▁faits 1 +▁travaille 1 +▁donnée 1 +aider 1 +▁planète 1 +gn 1 +ente 1 +activités 1 +▁Japon 1 +▁détail 1 +ST 1 +▁présentée 1 +ip 1 +▁personnels 1 +▁favorable 1 +▁figurant 1 +▁moitié 1 +▁équipe 1 +ches 1 +▁suffisamment 1 +▁bande 1 +▁déterminé 1 +▁conservation 1 +▁International 1 +▁Do 1 +▁copie 1 +▁limitée 1 +habitat 1 +▁lieux 1 +▁sexe 1 +ject 1 +▁utilisant 1 +▁grave 1 +▁logiciel 1 +▁collecte 1 +OMPI 1 +▁financé 1 +▁véhicule 1 +ial 1 +80 1 +ête 1 +agent 1 +▁protocole 1 +▁commissaire 1 +▁accepté 1 +▁crime 1 +IP 1 +importe 1 +▁importance 1 +adhésion 1 +▁retard 1 +▁connexes 1 +▁pénal 1 +▁mi 1 +▁Ontario 1 +▁RE 1 +▁conduite 1 +▁is 1 +71 1 +▁− 1 +▁contra 1 +lar 1 +▁commerciaux 1 +▁rencontre 1 +imp 1 +NE 1 +▁clair 1 +92 1 +▁conclusion 1 +▁perte 1 +▁éco 1 +étaient 1 +cle 1 +33 1 +▁tient 1 +▁am 1 +ache 1 +▁extraordinaire 1 +▁supplémentaire 1 +▁numérique 1 +▁climat 1 +▁considéré 1 +vie 1 +trait 1 +▁(1 1 +▁équitable 1 +▁CN 1 +▁table 1 +▁estimé 1 +▁conforme 1 +ât 1 +▁sexes 1 +▁généraux 1 +▁exercice 1 +avocat 1 +DI 1 +vit 1 +35 1 +IE 1 +▁Ho 1 +inspection 1 +▁solide 1 +▁réalisé 1 +▁Ac 1 +▁message 1 +▁contenant 1 +dis 1 +▁collective 1 +▁Cl 1 +▁tâches 1 +▁laboratoire 1 +▁2007, 1 +74 1 +▁initiative 1 +▁tirer 1 +▁géographique 1 +clu 1 +ami 1 +▁annoncé 1 +▁mari 1 +97 1 +▁temporaire 1 +▁appliquer 1 +▁moteur 1 +▁température 1 +▁procédé 1 +▁résolutions 1 +▁périodique 1 +achat 1 +▁fournis 1 +▁inscrit 1 +In 1 +▁Be 1 +▁3, 1 +PS 1 +mêmes 1 +▁tandis 1 +sol 1 +▁élevée 1 +affectation 1 +▁He 1 +▁apporté 1 +▁bout 1 +ID 1 +atrice 1 +ko 1 +▁33 1 +▁représentent 1 +▁Jo 1 +ber 1 +▁ri 1 +▁applicables 1 +▁fournit 1 +▁maritime 1 +▁vivre 1 +▁agricole 1 +SP 1 +ères 1 +▁puisque 1 +▁couleur 1 +▁graves 1 +▁devra 1 +▁employé 1 +▁Du 1 +CP 1 +▁Toronto 1 +▁auxquels 1 +▁musique 1 +▁Premières 1 +▁primaire 1 +▁définir 1 +▁Conformément 1 +existe 1 +91 1 +▁Autres 1 +▁... 1 +▁mouvement 1 +▁parole 1 +▁utilise 1 +▁indicateurs 1 +70 1 +99 1 +▁Pas 1 +▁sé 1 +▁procéder 1 +▁parvenir 1 +logue 1 +▁commerciale 1 +espèce 1 +▁utilisateurs 1 +observation 1 +ium 1 +▁Note 1 +▁intéressant 1 +ore 1 +▁sauf 1 +▁Mc 1 +▁préoccupations 1 +part 1 +▁concours 1 +issue 1 +▁Per 1 +▁modalités 1 +▁So 1 +▁susceptibles 1 +▁énergétique 1 +▁pilote 1 +▁tiré 1 +ssé 1 +▁montré 1 +pprovisionnement 1 +viennent 1 +▁station 1 +▁ordinateur 1 +Est 1 +▁EN 1 +▁cherche 1 +▁inclus 1 +ox 1 +▁appris 1 +▁Mi 1 +études 1 +oir 1 +▁global 1 +PP 1 +▁chefs 1 +lier 1 +▁somme 1 +▁soi 1 +▁Air 1 +adi 1 +ik 1 +▁payé 1 +cra 1 +▁moderne 1 +▁circulation 1 +▁Pa 1 +▁rejet 1 +▁exposé 1 +ières 1 +era 1 +▁réalisés 1 +œ 1 +lation 1 +▁Sommet 1 +OP 1 +▁Fédération 1 +▁reconnaissance 1 +▁producteurs 1 +EUR 1 +94 1 +œuvre 1 +▁jugé 1 +▁fixé 1 +rage 1 +▁demandeur 1 +▁pourra 1 +93 1 +▁fournisseurs 1 +▁Section 1 +▁transparence 1 +▁voulu 1 +forme 1 +▁mou 1 +col 1 +delà 1 +000 1 +▁salaire 1 +▁prêts 1 +▁répond 1 +▁55 1 +▁pouvant 1 +▁situations 1 +▁Po 1 +ock 1 +▁nu 1 +ama 1 +▁Ou 1 +▁immédiatement 1 +q 1 +▁Montréal 1 +let 1 +▁description 1 +▁réelle 1 +▁Ju 1 +▁at 1 +▁su 1 +▁venu 1 +▁double 1 +▁1993 1 +isés 1 +▁familiale 1 +▁consacré 1 +▁conseiller 1 +exportation 1 +▁journée 1 +extérieur 1 +eaux 1 +▁supérieure 1 +su 1 +▁locale 1 +▁format 1 +cé 1 +▁ressort 1 +▁pauvres 1 +▁allé 1 +▁Col 1 +▁rémunération 1 +aille 1 +▁utilisée 1 +▁relève 1 +▁aérien 1 +▁gl 1 +▁envoyé 1 +▁central 1 +▁élèves 1 +▁32 1 +▁procès 1 +fo 1 +▁mené 1 +bon 1 +▁adapté 1 +EL 1 +▁souligne 1 +ôt 1 +▁can 1 +▁Genève 1 +▁sensibilisation 1 +▁désigné 1 +▁prochain 1 +tour 1 +▁Su 1 +▁feu 1 +▁affecté 1 +image 1 +▁futur 1 +ète 1 +▁Pourquoi 1 +érant 1 +▁LE 1 +gar 1 +▁structures 1 +oli 1 +tré 1 +atoire 1 +▁agricoles 1 +▁pièces 1 +▁fédéraux 1 +institution 1 +▁Bon 1 +▁placé 1 +▁comprennent 1 +▁commande 1 +▁auquel 1 +off 1 +▁impact 1 +▁participant 1 +tel 1 +affaire 1 +▁baisse 1 +▁européens 1 +▁Parce 1 +▁dossiers 1 +▁art 1 +▁conf 1 +▁événements 1 +emp 1 +-20 1 +▁poids 1 +▁vingt 1 +▁biais 1 +han 1 +▁assuré 1 +▁fondé 1 +▁plainte 1 +▁répartition 1 +▁retraite 1 +▁Quel 1 +ral 1 +▁invité 1 +▁paragraphes 1 +▁détention 1 +41 1 +association 1 +gre 1 +▁positive 1 +▁illicite 1 +▁torture 1 +▁not 1 +alimentation 1 +cer 1 +▁éviter 1 +▁vit 1 +▁appr 1 +▁collectivité 1 +▁essentiellement 1 +dition 1 +issent 1 +▁Avant 1 +▁proche 1 +isée 1 +▁discussions 1 +▁papier 1 +amb 1 +nis 1 +inscription 1 +▁instance 1 +▁consulter 1 +▁conférences 1 +environ 1 +▁nuit 1 +▁hors 1 +▁certificat 1 +▁certaine 1 +▁Afin 1 +amendement 1 +spir 1 +naires 1 +▁recettes 1 +▁Bar 1 +▁concert 1 +69 1 +▁Demande 1 +atifs 1 +▁effectué 1 +▁1992 1 +acte 1 +▁< 1 +ME 1 +vre 1 +▁CA 1 +▁gérer 1 +ley 1 +▁(1) 1 +▁élaborer 1 +▁exécutif 1 +▁savons 1 +ett 1 +bre 1 +▁2010 1 +fa 1 +▁endroit 1 +▁acte 1 +dia 1 +▁violation 1 +▁encourage 1 +tru 1 +▁complexe 1 +ym 1 +45 1 +▁marche 1 +Orient 1 +▁Pr 1 +▁dessin 1 +▁payer 1 +▁Tableau 1 +▁Compte 1 +▁sent 1 +▁support 1 +auto 1 +▁compréhension 1 +▁tâche 1 +▁reprise 1 +▁industrielle 1 +▁menées 1 +IF 1 +▁sujets 1 +▁vi 1 +86 1 +▁discussion 1 +87 1 +ath 1 +-4 1 +organisme 1 +▁Pays 1 +89 1 +▁congé 1 +ET 1 +▁instrument 1 +▁rester 1 +▁populaire 1 +▁différences 1 +▁faudrait 1 +autorisation 1 +▁historique 1 +nn 1 +appliquent 1 +77 1 +las 1 +▁tribunaux 1 +▁brut 1 +▁ferme 1 +▁maximum 1 +▁adjoint 1 +▁Description 1 +▁construire 1 +▁Nouveau 1 +Association 1 +▁voici 1 +ack 1 +employeur 1 +▁générales 1 +UR 1 +audience 1 +▁For 1 +▁minimum 1 +NA 1 +», 1 +appareil 1 +▁engagement 1 +jo 1 +az 1 +nent 1 +▁félicite 1 +▁arrêt 1 +▁signature 1 +interdiction 1 +ans 1 +▁cinquante 1 +▁propriétaire 1 +▁réduit 1 +▁présidence 1 +▁accrue 1 +▁correspond 1 +▁allons 1 +IA 1 +cal 1 +▁chacune 1 +van 1 +▁cap 1 +net 1 +▁inst 1 +▁chiffres 1 +▁Numéro 1 +▁cher 1 +▁bénéficier 1 +▁additionnel 1 +▁vitesse 1 +▁rencontré 1 +▁camp 1 +▁neuf 1 +▁communes 1 +▁lutter 1 +employé 1 +▁Institut 1 +opération 1 +ues 1 +▁destinés 1 +▁nouvel 1 +▁extrêmement 1 +▁posé 1 +▁taxe 1 +▁distance 1 +▁occupé 1 +▁Ha 1 +ndre 1 +▁présentés 1 +▁premières 1 +▁trouvent 1 +▁opérationnel 1 +▁palestinien 1 +exposition 1 +▁mises 1 +▁étape 1 +▁aient 1 +RA 1 +usage 1 +▁conjoint 1 +fect 1 +▁acc 1 +▁adoptées 1 +▁minorités 1 +▁désormais 1 +▁meilleurs 1 +▁bonnes 1 +▁véritable 1 +asp 1 +34 1 +▁père 1 +rique 1 +▁motifs 1 +▁Aux 1 +▁cible 1 +▁Ceci 1 +▁signalé 1 +▁enquêtes 1 +▁perdu 1 +▁largement 1 +main 1 +▁assistance 1 +▁noté 1 +▁spéciales 1 +ili 1 +▁obstacles 1 +Initiative 1 +avance 1 +▁représentation 1 +▁initiale 1 +▁concernés 1 +▁facteur 1 +▁chaîne 1 +▁adopter 1 +▁norme 1 +impose 1 +84 1 +▁préparer 1 +▁associés 1 +37 1 +▁constaté 1 +vision 1 +fic 1 +▁Am 1 +programme 1 +▁pre 1 +NU 1 +▁détaillée 1 +▁traitements 1 +▁rà 1 +▁Oui 1 +51 1 +acquisition 1 +▁rapporteur 1 +▁entend 1 +▁Société 1 +▁huit 1 +▁yeux 1 +▁violations 1 +▁stade 1 +▁crédits 1 +▁encourager 1 +▁Avis 1 +▁erreur 1 +▁accroître 1 +▁devient 1 +mul 1 +▁homme 1 +5) 1 +▁espace 1 +▁auront 1 +▁Cor 1 +44 1 +▁remboursement 1 +▁vaccin 1 +▁qualifié 1 +▁Th 1 +▁dirigeants 1 +▁culturels 1 +pens 1 +ifier 1 +▁bar 1 +▁objet 1 +▁Forum 1 +▁délais 1 +▁religion 1 +avion 1 +49 1 +▁chambres 1 +▁gouvernementaux 1 +▁Trésor 1 +▁sûreté 1 +▁3. 1 +▁collègues 1 +▁em 1 +▁auto 1 +▁thème 1 +▁subi 1 +▁To 1 +32 1 +▁rep 1 +▁climatique 1 +▁applicable 1 +▁forêts 1 +exécut 1 +IL 1 +▁assure 1 +▁frontières 1 +og 1 +▁Chambre 1 +▁34 1 +▁naissance 1 +OL 1 +▁composition 1 +ature 1 +rent 1 +▁envers 1 +43 1 +▁répercussions 1 +hôtel 1 +‐ 1 +▁échanges 1 +▁américaine 1 +gin 1 +engage 1 +els 1 +lis 1 +Bas 1 +▁Affaires 1 +fe 1 +gent 1 +39 1 +▁1991 1 +cy 1 +▁fixe 1 +▁faisons 1 +▁efficacement 1 +mique 1 +inte 1 +parle 1 +▁causé 1 +CO 1 +ric 1 +▁dynamique 1 +▁1999, 1 +▁Port 1 +▁avancé 1 +▁hausse 1 +tan 1 +2) 1 +▁voulons 1 +▁signe 1 +▁IN 1 +▁essais 1 +▁cro 1 +met 1 +gen 1 +évaluer 1 +▁évalué 1 +CA 1 +hér 1 +mètre 1 +▁exclu 1 +▁Toutes 1 +▁démarche 1 +▁amis 1 +▁acteurs 1 +38 1 +we 1 +attache 1 +▁Stratégie 1 +sur 1 +DC 1 +▁appuyer 1 +▁transformation 1 +▁spécifique 1 +▁construit 1 +oll 1 +actif 1 +▁masse 1 +▁chance 1 +éthique 1 +▁ver 1 +▁min 1 +SI 1 +▁carrière 1 +▁déplacement 1 +▁Ab 1 +▁Politique 1 +▁Sh 1 +▁antérieure 1 +PA 1 +▁village 1 +▁africaine 1 +èrent 1 +encontre 1 +▁quatrième 1 +UN 1 +indication 1 +avaient 1 +▁mod 1 +▁textes 1 +▁accordée 1 +ust 1 +▁COM 1 +▁36 1 +▁choisir 1 +phy 1 +▁puissance 1 +▁réserves 1 +intéresse 1 +▁cœur 1 +▁naturel 1 +▁feuille 1 +OC 1 +▁Puis 1 +▁Ri 1 +▁législatif 1 +▁écologique 1 +vin 1 +▁formule 1 +▁déb 1 +embl 1 +Iraq 1 +95 1 +▁entièrement 1 +▁incroyable 1 +▁finale 1 +▁extérieur 1 +CS 1 +ki 1 +ata 1 +▁datée 1 +assi 1 +36 1 +▁réf 1 +-19 1 +▁Amérique 1 +▁arrêté 1 +▁cultures 1 +ED 1 +▁contractant 1 +▁aff 1 +▁servi 1 +▁accordé 1 +▁mont 1 +▁contrats 1 +UL 1 +▁performance 1 +▁télévision 1 +ham 1 +▁internes 1 +▁2005. 1 +▁garantie 1 +▁détails 1 +▁refus 1 +▁garanti 1 +▁Pe 1 +▁interdit 1 +oi 1 +▁km 1 +unité 1 +▁échéant 1 +▁bi 1 +IV 1 +▁atteinte 1 +▁indispensable 1 +▁phénomène 1 +▁Moyen 1 +uré 1 +▁limité 1 +▁chambre 1 +▁formé 1 +élaborer 1 +▁prévus 1 +▁mérite 1 +▁perspective 1 +▁attend 1 +▁sait 1 +▁régulièrement 1 +▁opinion 1 +mm 1 +▁complètement 1 +▁médicaux 1 +▁pension 1 +▁régler 1 +▁invite 1 +▁PE 1 +▁représenté 1 +▁tenant 1 +lais 1 +▁terminé 1 +▁subventions 1 +▁extérieure 1 +ica 1 +▁voiture 1 +immigration 1 +▁étudié 1 +EMENT 1 +rer 1 +▁spéciaux 1 +▁avions 1 +▁vivent 1 +stat 1 +▁adultes 1 +▁séjour 1 +▁75 1 +▁registre 1 +▁ressemble 1 +TS 1 +taire 1 +▁patrimoine 1 +▁électroniques 1 +▁Pendant 1 +▁blanc 1 +ld 1 +▁compagnie 1 +▁augmenter 1 +interprétation 1 +▁brevets 1 +▁budgétaires 1 +▁usage 1 +▁directe 1 +CT 1 +▁pertinents 1 +▁provinciaux 1 +▁destruction 1 +▁fum 1 +▁réception 1 +▁préliminaire 1 +▁pourront 1 +▁travaillent 1 +gro 1 +▁Lorsqu 1 +▁coll 1 +▁Di 1 +duit 1 +▁image 1 +cial 1 +▁sommet 1 +tà 1 +▁développé 1 +.7 1 +64 1 +indépendance 1 +▁Communication 1 +▁Premier 1 +▁conscience 1 +▁établie 1 +▁veulent 1 +enfance 1 +▁Ressources 1 +▁Mexique 1 +éno 1 +▁Man 1 +▁constituer 1 +▁Q 1 +ros 1 +position 1 +ites 1 +▁Go 1 +Asie 1 +gan 1 +ino 1 +ements 1 +▁soumettre 1 +▁transmis 1 +cha 1 +▁37 1 +EP 1 +ides 1 +▁évident 1 +▁gamme 1 +▁paiements 1 +usine 1 +▁plaintes 1 +▁mauvaise 1 +espoir 1 +▁recrutement 1 +▁meilleures 1 +gie 1 +quant 1 +line 1 +▁détermination 1 +65 1 +aw 1 +▁♫ 1 +▁sanctions 1 +comp 1 +▁aérienne 1 +▁arabe 1 +▁culturelle 1 +just 1 +lli 1 +électricité 1 +ulaire 1 +▁Har 1 +▁contribué 1 +abri 1 +▁linguistique 1 +nique 1 +excellent 1 +▁+ 1 +▁rappelle 1 +▁partager 1 +berg 1 +▁mauvais 1 +6) 1 +▁ONG 1 +▁couvert 1 +embr 1 +bit 1 +grad 1 +▁brevet 1 +hor 1 +▁Grand 1 +▁Première 1 +▁appropriées 1 +82 1 +▁Israël 1 +54 1 +85 1 +▁Ga 1 +▁2004. 1 +nat 1 +roule 1 +▁décennie 1 +▁endroits 1 +▁All 1 +▁nation 1 +amine 1 +▁voulez 1 +Irlande 1 +á 1 +▁fera 1 +▁Comp 1 +identité 1 +▁élevés 1 +▁Corée 1 +voi 1 +88 1 +▁client 1 +nom 1 +▁malgré 1 +48 1 +▁vérifier 1 +47 1 +▁présentées 1 +▁officielle 1 +▁climatiques 1 +▁voit 1 +▁prison 1 +▁peur 1 +pol 1 +▁Bu 1 +▁jury 1 +▁biennal 1 +▁Nombre 1 +tée 1 +▁rang 1 +▁effectivement 1 +gel 1 +▁riche 1 +▁convaincu 1 +▁changé 1 +élément 1 +▁prennent 1 +▁consensus 1 +-10 1 +venant 1 +▁corruption 1 +▁lit 1 +▁th 1 +▁Pre 1 +▁découvert 1 +▁établis 1 +▁rural 1 +CH 1 +1, 1 +▁mentale 1 +dite 1 +▁Décision 1 +ave 1 +▁arrivé 1 +har 1 +▁souhait 1 +▁Forces 1 +rog 1 +engager 1 +▁donateurs 1 +▁Pacifique 1 +▁nourriture 1 +▁titulaire 1 +adaptation 1 +▁réalisée 1 +▁tôt 1 +▁individus 1 +▁alimentaires 1 +▁arriver 1 +▁crimes 1 +▁culturelles 1 +▁300 1 +▁Millénaire 1 +▁Parmi 1 +FC 1 +▁associations 1 +▁Manitoba 1 +▁images 1 +▁terroristes 1 +▁documentation 1 +zo 1 +appelle 1 +▁visés 1 +▁pétrole 1 +ost 1 +▁professionnel 1 +2006 1 +▁réellement 1 +opposition 1 +2007 1 +Administration 1 +▁reconnaître 1 +▁préparé 1 +▁facilement 1 +▁électrique 1 +ANT 1 +▁économie 1 +▁visées 1 +▁accident 1 +▁différent 1 +▁rivière 1 +▁Association 1 +nie 1 +toxi 1 +ajout 1 +▁poly 1 +▁ha 1 +▁indépendant 1 +ssez 1 +2005 1 +ster 1 +▁cou 1 +ville 1 +▁Gestion 1 +▁sentiment 1 +▁officiel 1 +▁conc 1 +fl 1 +▁(2) 1 +▁budgétaire 1 +▁souligner 1 +▁viol 1 +▁emplois 1 +▁bénéfice 1 +▁pollution 1 +▁auraient 1 +▁Commissaire 1 +▁partis 1 +igu 1 +▁manifeste 1 +▁requis 1 +2, 1 +▁liaison 1 +▁entrepris 1 +intégrité 1 +▁expériences 1 +▁quartier 1 ++ 1 +▁européennes 1 +her 1 +ze 1 +bas 1 +ook 1 +▁pouvaient 1 +▁séparé 1 +55 1 +▁fabricant 1 +▁rurales 1 +▁dispositif 1 +struct 1 +▁professeur 1 +▁million 1 +▁laisser 1 +▁préoccupé 1 +mor 1 +▁accompli 1 +▁semblable 1 +▁Recherche 1 +▁48 1 +▁Tu 1 +▁culturel 1 +uer 1 +▁dette 1 +▁autorisée 1 +▁séances 1 +ouch 1 +▁impossible 1 +▁rouge 1 +▁38 1 +▁pièce 1 +▁carbone 1 +46 1 +▁2003. 1 +ax 1 +▁conclure 1 +▁indiquer 1 +▁4, 1 +▁foi 1 +▁régi 1 +▁relever 1 +▁répondu 1 +▁outil 1 +▁continent 1 +IG 1 +univers 1 +amour 1 +nées 1 +▁géo 1 +▁Système 1 +▁médicale 1 +▁résumé 1 +ute 1 +▁Ka 1 +▁Exp 1 +sistant 1 +indemnité 1 +▁remplir 1 +▁allant 1 +▁considérer 1 +▁pal 1 +ailleurs 1 +▁vent 1 +▁élu 1 +original 1 +▁possibles 1 +▁tarif 1 +-01 1 +▁PNUD 1 +fin 1 +▁Congo 1 +▁exportations 1 +▁statistique 1 +▁Qui 1 +ott 1 +ble 1 +$ 1 +▁évaluations 1 +▁jeune 1 +▁saisi 1 +▁transmission 1 +▁concentration 1 +▁Information 1 +98 1 +▁Guide 1 +▁cibl 1 +▁gauche 1 +UT 1 +▁pleine 1 +▁porté 1 +▁El 1 +ight 1 +▁division 1 +▁nécessite 1 +ek 1 +▁plénière 1 +▁respectivement 1 +▁notion 1 +GR 1 +▁publications 1 +▁saison 1 +ary 1 +▁criminalité 1 +▁inférieur 1 +ancienne 1 +▁auxquelles 1 +ote 1 +ome 1 +▁juridiction 1 +ann 1 +▁monétaire 1 +▁informatique 1 +RO 1 +▁livres 1 +identification 1 +▁relevant 1 +▁prié 1 +▁Plusieurs 1 +▁Voilà 1 +▁déterminée 1 +▁noter 1 +▁Mont 1 +▁décès 1 +▁Pla 1 +▁tellement 1 +▁constitutionnel 1 +▁Mari 1 +▁exemples 1 +▁mention 1 +▁Aujourd 1 +ina 1 +▁préoccupation 1 +▁fiscale 1 +▁activement 1 +▁scène 1 +ENT 1 +58 1 +emplacement 1 +▁électorale 1 +mit 1 +▁Force 1 +▁axée 1 +▁Sp 1 +▁Turquie 1 +▁étapes 1 +opinion 1 +mie 1 +OMC 1 +MP 1 +▁identifié 1 +▁Sou 1 +▁déployés 1 +▁gestionnaires 1 +▁alinéa 1 +▁42 1 +euro 1 +vention 1 +individu 1 +▁liée 1 +▁augmente 1 +3) 1 +por 1 +céd 1 +▁significative 1 +expliqu 1 +▁libertés 1 +▁44 1 +▁41 1 +▁fondamentales 1 +▁appuie 1 +▁énorme 1 +▁intégral 1 +oud 1 +▁Saskatchewan 1 +za 1 +▁concerné 1 +▁autonome 1 +▁FC 1 +▁crée 1 +écri 1 +▁remise 1 +▁satisfait 1 +▁devront 1 +57 1 +▁affirme 1 +▁char 1 +▁modifiée 1 +▁Personne 1 +▁eau 1 +▁migration 1 +AF 1 +aud 1 +AI 1 +▁raciale 1 +duction 1 +▁capable 1 +▁Tel 1 +tit 1 +▁39 1 +▁hôtel 1 +▁effectuer 1 +▁euro 1 +▁infrastructures 1 +époque 1 +▁précision 1 +▁repose 1 +▁matériaux 1 +tive 1 +WP 1 +▁résidence 1 +59 1 +▁subvention 1 +échelon 1 +▁devaient 1 +Etat 1 +▁prévoir 1 +▁Brésil 1 +▁relevé 1 +▁Total 1 +acc 1 +▁marques 1 +▁financer 1 +▁décider 1 +intermédiaire 1 +▁interventions 1 +42 1 +▁1) 1 +▁forcé 1 +▁utiles 1 +MI 1 +▁différente 1 +63 1 +pre 1 +▁travaillé 1 +▁Ko 1 +ets 1 +▁armés 1 +▁réformes 1 +exception 1 +appelant 1 +ru 1 +▁débats 1 +▁provinciale 1 +aff 1 +▁élargi 1 +▁avantage 1 +▁CC 1 +▁handicapées 1 +arc 1 +prolifération 1 +introduction 1 +▁inf 1 +61 1 +▁appropriée 1 +▁systématique 1 +Allemagne 1 +▁fille 1 +▁étroite 1 +dy 1 +▁garanties 1 +▁naturelle 1 +entretien 1 +acteur 1 +▁observer 1 +▁indépendante 1 +AG 1 +▁2002. 1 +(2) 1 +▁unités 1 +▁heureux 1 +▁individuelle 1 +isées 1 +avez 1 +▁continuent 1 +▁protégé 1 +▁chances 1 +▁comités 1 +▁extra 1 +▁cartes 1 +ico 1 +▁accomplis 1 +uve 1 +occupant 1 +▁distinct 1 +armée 1 +html 1 +▁clé 1 +▁bâtiment 1 +▁obtenus 1 +▁SC 1 +DP 1 +▁assis 1 +attend 1 +gestion 1 +▁pertes 1 +▁constitué 1 +▁profil 1 +▁montrent 1 +▁figurent 1 +▁matin 1 +▁fréquence 1 +▁Droit 1 +rier 1 +▁régimes 1 +▁2006. 1 +▁reconnaît 1 +▁chinois 1 +Inde 1 +▁actuelles 1 +PI 1 +▁vulnérables 1 +▁civil 1 +▁pose 1 +▁fonctionne 1 +▁options 1 +MA 1 +81 1 +▁poser 1 +exprimer 1 +▁appliqué 1 +élé 1 +▁déclare 1 +instrument 1 +▁diminution 1 +Écosse 1 +▁Sch 1 +▁normale 1 +▁biologique 1 +IQUE 1 +▁bis 1 +▁séminaire 1 +▁net 1 +▁es 1 +▁entités 1 +▁investi 1 +▁cru 1 +▁essentiels 1 +contr 1 +▁aime 1 +▁transporteur 1 +▁médecine 1 +agir 1 +by 1 +humanité 1 +▁publicité 1 +incidence 1 +▁tendances 1 +▁enfin 1 +pul 1 +▁esprit 1 +62 1 +nier 1 +▁échange 1 +▁influence 1 +▁menée 1 +▁lire 1 +▁marqué 1 +▁récent 1 +▁faisait 1 +ographie 1 +▁56 1 +▁banques 1 +den 1 +TC 1 +▁apprendre 1 +iti 1 +▁côte 1 +▁95 1 +▁israélien 1 +▁approprié 1 +Atlantique 1 +Alberta 1 +animal 1 +▁Lors 1 +ther 1 +▁migrants 1 +importation 1 +________________ 1 +▁législatives 1 +mobil 1 +cor 1 +ira 1 +▁étudier 1 +▁fréquent 1 +▁CON 1 +éné 1 +Brunswick 1 +▁ajouté 1 +▁mariage 1 +strat 1 +UNICEF 1 +▁IRSC 1 +imposition 1 +▁machine 1 +▁amendements 1 +▁visée 1 +▁claire 1 +/3 1 +▁annonce 1 +lles 1 +▁2) 1 +▁Statistique 1 +RES 1 +▁versé 1 +1) 1 +▁sensible 1 +▁mines 1 +▁éliminer 1 +entente 1 +▁Paris 1 +▁établissement 1 +▁attribué 1 +▁concentrations 1 +▁mettant 1 +▁noir 1 +bor 1 +▁couche 1 +▁dirigé 1 +En 1 +▁spatiale 1 +▁multilatéral 1 +▁mur 1 +▁2008, 1 +▁réglement 1 +▁x 1 +▁LES 1 +ries 1 +▁manuel 1 +aller 1 +▁envisagé 1 +ling 1 +▁économies 1 +▁âgées 1 +université 1 +▁pages 1 +▁adresse 1 +▁donnent 1 +espère 1 +▁poisson 1 +▁Recommandation 1 +DA 1 +▁poissons 1 +▁Dis 1 +amm 1 +▁touchant 1 +atrices 1 +▁destiné 1 +▁Ver 1 +▁affaire 1 +▁tourisme 1 +▁Table 1 +adopter 1 +▁intéressés 1 +▁proactive 1 +▁suffisante 1 +▁existant 1 +▁partagé 1 +▁volet 1 +▁calendrier 1 +▁Grande 1 +▁restrictions 1 +▁examine 1 +MD 1 +▁combien 1 +avec 1 +ayant 1 +52 1 +▁multiples 1 +▁négociation 1 +▁administratif 1 +▁domin 1 +▁Kosovo 1 +▁Ph 1 +▁combat 1 +56 1 +▁fallait 1 +▁robot 1 +▁Deux 1 +▁lourd 1 +temp 1 +▁spécialistes 1 +bu 1 +▁PA 1 +▁Divulgation 1 +▁proposée 1 +▁située 1 +▁individuel 1 +▁conduit 1 +▁confirmé 1 +environnementaux 1 +▁âge 1 +Internet 1 +▁présentent 1 +▁adéquate 1 +-02 1 +▁puissant 1 +own 1 +▁Paragraphe 1 +▁limiter 1 +▁destinées 1 +gique 1 +▁virus 1 +accroître 1 +▁suffisant 1 +▁Quelle 1 +▁plaignant 1 +▁médecins 1 +▁pacifique 1 +édition 1 +▁concernées 1 +▁retrait 1 +▁Développement 1 +▁décide 1 +honorable 1 +▁complexes 1 +af 1 +▁existants 1 +▁varie 1 +lage 1 +▁revêt 1 +alcool 1 +▁majeure 1 +-03 1 +▁chercher 1 +instruction 1 +matique 1 +▁remercier 1 +▁PRO 1 +▁mène 1 +président 1 +île 1 +▁explique 1 +▁reflète 1 +▁Suisse 1 +▁croire 1 +▁dispens 1 +iennes 1 +▁Toute 1 +▁sert 1 +▁prochaines 1 +▁objets 1 +▁autorisation 1 +▁plu 1 +view 1 +intéressé 1 +▁communs 1 +▁2007. 1 +▁Vo 1 +▁valoir 1 +▁Autochtones 1 +▁personnelle 1 +MC 1 +▁réussite 1 +▁Act 1 +▁continuera 1 +alis 1 +bar 1 +▁utilisent 1 +▁ministérielle 1 +▁enseignants 1 +▁be 1 +▁Inde 1 +iel 1 +▁parlementaire 1 +ouest 1 +▁introduit 1 +▁incombe 1 +▁85 1 +▁satellite 1 +▁accompagné 1 +▁navires 1 +FI 1 +▁remplacé 1 +▁National 1 +entreprises 1 +▁second 1 +▁réparti 1 +ense 1 +▁mètres 1 +audit 1 +▁photos 1 +▁établies 1 +▁Inter 1 +▁médecin 1 +▁prestataire 1 +▁variation 1 +▁neuro 1 +▁renvoi 1 +▁exactement 1 +▁parlé 1 +▁télécommunicat 1 +bin 1 +▁évolution 1 +▁Dieu 1 +LA 1 +cou 1 +▁tiendra 1 +mission 1 +▁Mal 1 +.9 1 +▁effective 1 +▁classification 1 +▁destination 1 +▁sort 1 +ajustement 1 +▁émis 1 +inscrire 1 +▁récente 1 +venir 1 +í 1 +octroi 1 +▁Ann 1 +▁mini 1 +▁disent 1 +-11 1 +▁admissibles 1 +lie 1 +▁actifs 1 +ps 1 +▁prescrit 1 +pho 1 +▁intitulée 1 +uvre 1 +53 1 +TR 1 +▁Dépenses 1 +▁allez 1 +horizon 1 +▁institution 1 +▁Hu 1 +▁acquis 1 +▁2000. 1 +▁conventions 1 +▁Vancouver 1 +Neuve 1 +QU 1 +▁découlant 1 +▁Salle 1 +▁désigne 1 +▁mémoire 1 +▁conscient 1 +ménage 1 +▁oui 1 +pha 1 +lat 1 +▁plantes 1 +Euro 1 +▁importations 1 +▁1998, 1 +isent 1 +▁énergie 1 +▁centaines 1 +incident 1 +los 1 +▁soulevé 1 +▁Suède 1 +▁sortir 1 +▁intérieure 1 +▁mesurer 1 +gène 1 +▁britannique 1 +.1) 1 +adapter 1 +▁bénéficiaires 1 +▁San 1 +▁NO 1 +▁contrôler 1 +▁défaut 1 +▁bourse 1 +▁dehors 1 +▁pat 1 +euros 1 +▁perspectives 1 +aimerais 1 +▁actuels 1 +▁variable 1 +▁échelle 1 +▁offerts 1 +▁écrite 1 +▁drogue 1 +▁signal 1 +▁dimension 1 +▁contribue 1 +Iran 1 +▁frontière 1 +▁sang 1 +▁Ta 1 +▁(2 1 +▁mondialisation 1 +▁fortement 1 +▁requête 1 +▁incidences 1 +exclusion 1 +▁caractéris 1 +▁Entre 1 +▁dois 1 +▁design 1 +acquitter 1 +▁maximale 1 +▁regarder 1 +▁satisfaire 1 +▁soir 1 +▁Bi 1 +▁expliquer 1 +▁remplacer 1 +fer 1 +▁lac 1 +▁couverture 1 +DR 1 +▁juges 1 +▁jugement 1 +▁2001. 1 +▁chômage 1 +▁43 1 +▁mineurs 1 +▁Ltd 1 +apport 1 +▁rejeté 1 +▁intention 1 +▁résolu 1 +▁appareils 1 +▁Madame 1 +▁comparaison 1 +▁empêcher 1 +▁légale 1 +▁nourri 1 +LI 1 +cho 1 +▁entrée 1 +mettre 1 +▁états 1 +▁mus 1 +▁effectuée 1 +▁rappeler 1 +▁détaillé 1 +vari 1 +▁administrative 1 +▁soumission 1 +▁américain 1 +▁relié 1 +▁49 1 +nch 1 +▁sondage 1 +ordinateur 1 +▁fils 1 +▁consentement 1 +och 1 +▁terrestre 1 +▁existantes 1 +OS 1 +atives 1 +▁recueilli 1 +▁Gra 1 +▁portent 1 +▁65 1 +Prince 1 +▁quotidienne 1 +▁400 1 +▁banque 1 +▁Lo 1 +▁consacrée 1 +▁tradition 1 +▁substance 1 +ull 1 +▁commissions 1 +opportunité 1 +▁possède 1 +▁montants 1 +▁profonde 1 +océan 1 +▁rem 1 +▁tarifaire 1 +▁Tra 1 +▁patient 1 +lée 1 +aison 1 +▁religieuse 1 +▁joué 1 +fond 1 +▁mortalité 1 +▁devoir 1 +▁remercie 1 +ish 1 +▁notification 1 +▁IV 1 +Le 1 +▁détenus 1 +▁futures 1 +▁instamment 1 +eng 1 +▁précisément 1 +▁Certaines 1 +▁conserver 1 +▁universités 1 +honneur 1 +▁Val 1 +éviter 1 +▁majeur 1 +▁Téléphone 1 +VI 1 +▁dommages 1 +** 1 +index 1 +allocation 1 +équilibre 1 +Étant 1 +itude 1 +▁traditionnels 1 +▁Défense 1 +▁humanitaires 1 +▁similaires 1 +▁54 1 +▁fou 1 +termin 1 +▁préjudice 1 +▁souci 1 +▁flux 1 +hiver 1 +nez 1 +ting 1 +▁facultatif 1 +▁technologique 1 +▁recon 1 +▁compétentes 1 +BC 1 +II 1 +gard 1 +vel 1 +oxy 1 +▁doté 1 +▁ami 1 +équ 1 +arriv 1 +▁web 1 +▁confi 1 +▁58 1 +▁préciser 1 +▁Mesures 1 +▁équipes 1 +▁Nom 1 +occupation 1 +▁effectuées 1 +écran 1 +bert 1 +▁acceptable 1 +▁CEE 1 +▁PME 1 +▁esp 1 +▁57 1 +▁ten 1 +▁dépasse 1 +▁Jean 1 +▁MA 1 +ombre 1 +▁ministériel 1 +-12 1 +▁district 1 +▁lecture 1 +uk 1 +▁vite 1 +▁fondamentale 1 +ange 1 +▁territoriale 1 +utilisateur 1 +▁Pi 1 +éni 1 +▁susmentionné 1 +▁crucial 1 +▁RÉ 1 +▁stable 1 +▁lié 1 +hypothèse 1 +sixième 1 +7) 1 +▁Environnement 1 +▁(613) 1 +doc 1 +▁proposées 1 +▁Examen 1 +▁Im 1 +illage 1 +exemple 1 +▁menaces 1 +▁stocks 1 +▁seuls 1 +Al 1 +▁théorie 1 +▁CI 1 +▁volontaire 1 +américain 1 +▁conformes 1 +▁quelles 1 +▁municipalité 1 +▁classé 1 +arrière 1 +▁Allemagne 1 +élève 1 +▁tabac 1 +▁Ru 1 +bel 1 +▁totalement 1 +▁résulte 1 +▁directeurs 1 +▁Rappelant 1 +▁report 1 +▁légitime 1 +ADN 1 +▁indiquent 1 +▁parallèle 1 +▁confronté 1 +▁Télé 1 +▁visent 1 +▁veuillez 1 +▁essayer 1 +▁particulières 1 +voqu 1 +▁grain 1 +tain 1 +SR 1 +▁participent 1 +▁morale 1 +étique 1 +▁voies 1 +▁programmation 1 +▁collection 1 +▁causes 1 +▁nationalité 1 +-5 1 +▁cotisation 1 +équité 1 +▁répondants 1 +▁illégale 1 +▁fini 1 +▁renouvelable 1 +occuper 1 +▁succ 1 +▁Veuillez 1 +▁rédaction 1 +▁Cap 1 +active 1 +▁ventes 1 +▁gain 1 +office 1 +chet 1 +▁finances 1 +▁officiels 1 +▁énoncés 1 +▁revue 1 +3/ 1 +▁classique 1 +▁américains 1 +▁requises 1 +AIRE 1 +▁Présidente 1 +▁Sol 1 +▁vendu 1 +Ukraine 1 +▁axé 1 +▁entière 1 +tom 1 +▁Grâce 1 +atteindre 1 +▁lettres 1 +▁consolidation 1 +▁dispositifs 1 +▁Mise 1 +▁inclure 1 +▁Lettre 1 +▁lancer 1 +ii 1 +gh 1 +OI 1 +fait 1 +▁étrangère 1 +2004 1 +▁preuves 1 +poli 1 +▁rendue 1 +avantage 1 +▁anciens 1 +▁truc 1 +▁écart 1 +▁puisqu 1 +▁gratuit 1 +▁finalement 1 +▁équivalent 1 +abandon 1 +éral 1 +▁discuter 1 +▁initial 1 +▁précisé 1 +▁four 1 +▁présentes 1 +▁ultérieure 1 +expert 1 +▁connais 1 +▁livraison 1 +▁remplacement 1 +fact 1 +▁appelée 1 +lla 1 +▁habitants 1 +OM 1 +▁française 1 +mont 1 +▁bons 1 +iciens 1 +▁my 1 +▁Sal 1 +enregistr 1 +exigence 1 +nait 1 +▁appartenant 1 +▁radiodiffusion 1 +expansion 1 +▁questionnaire 1 +▁révisé 1 +▁préserver 1 +acier 1 +▁sanitaire 1 +.8 1 +class 1 +▁59 1 +▁placement 1 +▁courriel 1 +▁53 1 +▁semblent 1 +gramme 1 +▁te 1 +essaye 1 +▁Eh 1 +PT 1 +▁ratification 1 +mment 1 +lot 1 +▁formulées 1 +▁VI 1 +▁réparation 1 +▁répét 1 +extrême 1 +▁droite 1 +▁découvrir 1 +▁calculé 1 +▁incidence 1 +La 1 +▁Ge 1 +▁ii 1 +▁voilà 1 +▁essentielle 1 +▁combattre 1 +▁passant 1 +aménagement 1 +▁survie 1 +▁bases 1 +iller 1 +culaire 1 +ibilité 1 +ION 1 +FP 1 +▁viennent 1 +wi 1 +▁créée 1 +▁devenue 1 +▁continué 1 +▁bu 1 +▁catastrophes 1 +▁voulais 1 +▁pont 1 +ada 1 +▁reçues 1 +▁mobile 1 +flu 1 +▁mor 1 +▁dangereux 1 +▁espèce 1 +UM 1 +AUX 1 +▁révélé 1 +entend 1 +▁agir 1 +▁encouragé 1 +tien 1 +abilis 1 +▁allemand 1 +▁disposer 1 +▁chaud 1 +ampleur 1 +▁Liens 1 +septième 1 +▁députés 1 +immeuble 1 +▁négatif 1 +▁Région 1 +▁exercer 1 +▁disponibilité 1 +▁mélange 1 +[ 1 +aucune 1 +▁carburant 1 +▁discours 1 +▁47 1 +organisations 1 +lev 1 +▁seuil 1 +▁standard 1 +▁constitution 1 +▁bancaire 1 +angle 1 +▁définitive 1 +▁isolé 1 +▁montagne 1 +▁distinction 1 +▁médical 1 +batt 1 +pré 1 +▁ti 1 +▁entendre 1 +▁touche 1 +▁profiter 1 +▁progress 1 +hal 1 +▁Rec 1 +▁racisme 1 +asile 1 +pond 1 +▁Page 1 +occupe 1 +▁Nos 1 +▁Can 1 +▁garder 1 +▁préparatoire 1 +▁distribué 1 +▁inférieure 1 +tabli 1 +▁biologiques 1 +▁expériment 1 +▁privés 1 +▁productivité 1 +effort 1 +▁ref 1 +▁CD 1 +▁obligation 1 +ologiques 1 +illon 1 +▁résistance 1 +bat 1 +▁former 1 +▁bibliothèque 1 +▁exposition 1 +▁PIB 1 +▁Liste 1 +▁fondamental 1 +▁devrions 1 +▁bleu 1 +▁transparent 1 +perfectionnement 1 +EX 1 +▁avancés 1 +▁chiffre 1 +publi 1 +▁61 1 +▁thèmes 1 +bol 1 +▁US 1 +▁accessible 1 +▁entente 1 +attribution 1 +miné 1 +▁accorder 1 +▁approfondie 1 +ator 1 +▁Caraïbes 1 +▁insuffisant 1 +▁Organisation 1 +▁motif 1 +▁tests 1 +.10 1 +ITÉ 1 +▁restaurant 1 +TÉ 1 +▁jeunesse 1 +fu 1 +/4 1 +ié 1 +▁correct 1 +CEE 1 +viv 1 +▁découverte 1 +▁contrôlé 1 +▁dose 1 +▁poursuivi 1 +▁prenant 1 +▁pensé 1 +▁ronde 1 +▁emp 1 +▁Site 1 +▁mouvements 1 +▁spécialisées 1 +▁46 1 +▁ru 1 +▁considérés 1 +▁Budget 1 +vier 1 +▁ajouter 1 +▁implique 1 +................ 1 +▁Min 1 +▁Yukon 1 +▁Bosnie 1 +élargissement 1 +2008 1 +FR 1 +gal 1 +▁offrent 1 +▁milieux 1 +▁04 1 +▁constante 1 +▁pousse 1 +▁proposer 1 +▁Justice 1 +▁respecté 1 +▁mutuelle 1 +▁déposée 1 +▁exposés 1 +▁infractions 1 +▁domicile 1 +offrir 1 +▁tonnes 1 +▁soldats 1 +▁visé 1 +▁effectués 1 +▁retenue 1 +press 1 +▁moindre 1 +ini 1 +▁capitale 1 +▁exécuté 1 +-6 1 +▁exception 1 +▁époque 1 +indice 1 +): 1 +appuyer 1 +▁témoins 1 +aéroport 1 +▁tir 1 +bour 1 +▁Chapitre 1 +▁applications 1 +▁dà 1 +▁pensons 1 +▁envisage 1 +▁teneur 1 +▁irr 1 +▁1987 1 +▁saisie 1 +▁prioritaires 1 +▁Fi 1 +▁islamique 1 +hr 1 +▁profession 1 +▁contribuent 1 +▁prétend 1 +▁assujetti 1 +▁1989 1 +Île 1 +LO 1 +▁positif 1 +È 1 +▁Prie 1 +▁Afghanistan 1 +▁appuyé 1 +▁1997, 1 +harmonisation 1 +▁vérité 1 +▁auparavant 1 +jour 1 +nage 1 +ndra 1 +▁SO 1 +ung 1 +▁abouti 1 +VE 1 +aujourd 1 +▁universelle 1 +8) 1 +qua 1 +▁visible 1 +▁espagnol 1 +ado 1 +▁Transports 1 +électro 1 +▁informer 1 +▁gagner 1 +▁Réseau 1 +▁noms 1 +vol 1 +bout 1 +▁réflexion 1 +▁entraîne 1 +▁industries 1 +▁exigé 1 +▁faudra 1 +▁soixante 1 +▁pri 1 +▁99 1 +▁judiciaires 1 +huitième 1 +▁Alberta 1 +▁négative 1 +▁intéressées 1 +ivité 1 +▁organis 1 +éco 1 +▁résultant 1 +exploit 1 +▁constate 1 +▁versement 1 +neuvième 1 +▁réservé 1 +▁latine 1 +▁régulière 1 +▁Aide 1 +▁Wi 1 +cell 1 +▁tiens 1 +▁fournissent 1 +▁administrations 1 +▁GR 1 +▁77 1 +ancien 1 +▁russe 1 +▁bassin 1 +oux 1 +▁réclamation 1 +▁privées 1 +▁compose 1 +▁réglementaires 1 +IB 1 +▁CR 1 +▁institutionnels 1 +▁habituellement 1 +▁provenance 1 +▁froid 1 +▁Cuba 1 +▁Version 1 +où 1 +text 1 +▁restent 1 +▁traditionnelles 1 +2003 1 +mination 1 +▁prévisions 1 +▁fiable 1 +▁verre 1 +▁fichier 1 +connect 1 +▁essentielles 1 +OIT 1 +onde 1 +▁variété 1 +▁estimations 1 +▁minimale 1 +AV 1 +▁indi 1 +étendre 1 +▁regroupe 1 +▁apport 1 +▁Objectif 1 +▁fondement 1 +▁médicament 1 +▁physiques 1 +▁agent 1 +▁couvre 1 +anne 1 +▁coordonner 1 +▁invités 1 +▁52 1 +▁capitaux 1 +▁définis 1 +▁marge 1 +▁rassemble 1 +▁51 1 +▁énoncées 1 +▁perçu 1 +▁constater 1 +▁chasse 1 +▁volontaires 1 +▁marine 1 +▁enjeux 1 +rég 1 +vironnementale 1 +▁fournies 1 +▁sortie 1 +GE 1 +▁David 1 +▁vice 1 +▁favorise 1 +▁abus 1 +▁récentes 1 +▁1996, 1 +architecture 1 +▁suprême 1 +▁fusion 1 +gation 1 +▁Archives 1 +▁Norvège 1 +▁compétitivité 1 +Équipe 1 +▁mettent 1 +NC 1 +▁Asie 1 +▁histoires 1 +▁néanmoins 1 +▁glace 1 +▁inscrits 1 +▁impliqué 1 +▁rêve 1 +dor 1 +▁concrètes 1 +ministre 1 +▁sexuel 1 +▁formulé 1 +écart 1 +hu 1 +autonomie 1 +▁consomm 1 +rions 1 +▁Health 1 +explication 1 +▁soutient 1 +êt 1 +▁plaisir 1 +années 1 +▁franc 1 +▁chargés 1 +accompagne 1 +▁municipal 1 +indicateur 1 +▁PDF 1 +▁migr 1 +prend 1 +aliser 1 +▁nette 1 +▁Cadre 1 +clé 1 +axe 1 +▁orientations 1 +▁déterminant 1 +▁foyer 1 +▁Assemblée 1 +▁Mac 1 +▁1988 1 +▁déploiement 1 +cip 1 +▁condamné 1 +▁quels 1 +▁maîtrise 1 +ny 1 +▁indépendants 1 +actuel 1 +▁diminué 1 +▁Trans 1 +udi 1 +▁dangereuses 1 +▁suppose 1 +▁exercé 1 +▁fournisseur 1 +▁démontré 1 +▁département 1 +▁exact 1 +▁difficiles 1 +▁permettrait 1 +▁administratifs 1 +▁compromis 1 +▁futurs 1 +▁actif 1 +▁mentionne 1 +▁secret 1 +▁douanes 1 +Les 1 +▁donn 1 +▁envisager 1 +▁psycho 1 +▁évidence 1 +lique 1 +▁logique 1 +▁bénévole 1 +▁Paul 1 +▁développés 1 +▁capables 1 +▁traduit 1 +gg 1 +kin 1 +expertise 1 +agence 1 +▁Industrie 1 +« 1 +▁rat 1 +▁Courriel 1 +▁intermédiaire 1 +▁révolution 1 +-04 1 +▁autochtone 1 +-05 1 +▁transmettre 1 +▁mesuré 1 +▁150 1 +▁bébé 1 +▁reproduction 1 +▁clinique 1 +▁accru 1 +élev 1 +▁Journal 1 +pér 1 +▁absolument 1 +▁pur 1 +1/ 1 +▁surveiller 1 +▁célébr 1 +▁joint 1 +accroissement 1 +▁` 1 +arité 1 +▁derrière 1 +▁leadership 1 +joint 1 +▁voisins 1 +▁régissant 1 +▁transféré 1 +▁Fondation 1 +▁certainement 1 +Ar 1 +▁composant 1 +uy 1 +ALE 1 +▁intelligent 1 +▁reconstruction 1 +▁étroitement 1 +▁énoncé 1 +bul 1 +ker 1 +▁SUR 1 +▁coordonnée 1 +▁CH 1 +endroit 1 +Labrador 1 +disciplinaire 1 +▁régulier 1 +▁Soudan 1 +▁location 1 +▁visiteurs 1 +apporter 1 +▁accepter 1 +EI 1 +▁reçoivent 1 +▁solidarité 1 +▁identique 1 +▁88 1 +Environnement 1 +▁messages 1 +▁vir 1 +▁reçoit 1 +▁file 1 +▁chaleur 1 +▁Aucune 1 +▁égale 1 +▁Canadian 1 +▁importé 1 +▁800 1 +oï 1 +▁bâtiments 1 +▁dépense 1 +▁Environ 1 +▁délivré 1 +▁urgent 1 +▁Source 1 +mber 1 +▁adolescents 1 +▁couvrir 1 +▁combustible 1 +▁repos 1 +top 1 +aime 1 +▁Art 1 +attente 1 +▁consulté 1 +▁opportun 1 +▁intérimaire 1 +pin 1 +▁2004-2005 1 +▁décret 1 +▁Renseignements 1 +▁prête 1 +▁sauvage 1 +▁descend 1 +▁normalement 1 +Autriche 1 +▁effort 1 +affirm 1 +algré 1 +▁provincial 1 +▁cliniques 1 +atmosphère 1 +▁remarquable 1 +▁Évaluation 1 +homo 1 +▁criminel 1 +▁candidature 1 +asso 1 +▁opérationnelles 1 +TO 1 +▁forum 1 +▁machines 1 +▁voyages 1 +▁ethnique 1 +▁basé 1 +way 1 +▁Vienne 1 +vert 1 +▁normal 1 +lang 1 +▁uniforme 1 +iale 1 +▁employeurs 1 +ski 1 +▁Résolution 1 +▁libération 1 +▁regardez 1 +▁supérieurs 1 +▁géré 1 +▁associé 1 +/5 1 +▁industriel 1 +▁phrase 1 +▁installé 1 +labor 1 +▁Questions 1 +▁armées 1 +▁combinaison 1 +▁spécialisé 1 +▁02 1 +▁apportées 1 +▁conjointe 1 +▁indiquant 1 +▁intervention 1 +▁concevoir 1 +▁partenaire 1 +▁PAR 1 +-06 1 +▁quelconque 1 +▁lancement 1 +▁éventuelle 1 +▁statu 1 +▁sérieux 1 +hir 1 +▁proximité 1 +leur 1 +▁duquel 1 +▁belle 1 +arra 1 +NI 1 +▁probable 1 +gré 1 +important 1 +TRE 1 +▁mg 1 +5/ 1 +TER 1 +▁étranger 1 +▁pourtant 1 +▁expliqué 1 +▁difficulté 1 +agisse 1 +▁Troisième 1 +▁Jan 1 +▁concentrer 1 +▁africains 1 +▁admissible 1 +▁réglementaire 1 +▁solde 1 +EE 1 +▁blessure 1 +▁bilatéral 1 +▁mains 1 +▁tissus 1 +▁opération 1 +▁concernent 1 +▁faibles 1 +▁ter 1 +▁adopte 1 +▁mixte 1 +the 1 +gon 1 +▁îles 1 +▁ferroviaire 1 +▁Canadiennes 1 +▁tension 1 +▁bloc 1 +OD 1 +▁secours 1 +cre 1 +9) 1 +ède 1 +▁Pakistan 1 +▁rythme 1 +icule 1 +old 1 +Herzégovine 1 +▁délégué 1 +▁améliorations 1 +▁laissé 1 +mili 1 +▁institutionnel 1 +graph 1 +▁organe 1 +▁kilomètres 1 +échapp 1 +▁EX 1 +effectuer 1 +▁Pri 1 +▁formuler 1 +dans 1 +▁Ensuite 1 +▁trente 1 +▁imposé 1 +▁unité 1 +▁retrouve 1 +rise 1 +FA 1 +▁mondiaux 1 +▁graphique 1 +échec 1 +▁conjointement 1 +appliqu 1 +▁satisfaisant 1 +▁existent 1 +ART 1 +▁basée 1 +prouv 1 +▁diplomatique 1 +considérablement 1 +▁abordé 1 +habitude 1 +▁traduction 1 +aucun 1 +▁discipline 1 +▁secrétaire 1 +▁instructions 1 +écoute 1 +▁Roumanie 1 +▁analyses 1 +▁clause 1 +▁voyons 1 +ding 1 +▁artistes 1 +Ch 1 +riez 1 +ning 1 +▁parlementaires 1 +▁PC 1 +lam 1 +Québec 1 +▁complémentaires 1 +ö 1 +▁lacunes 1 +▁Parcs 1 +king 1 +▁équipé 1 +▁sollicit 1 +▁présentant 1 +ouvrage 1 +▁stress 1 +▁souhaitent 1 +▁prescriptions 1 +▁achevé 1 +▁voitures 1 +emballage 1 +▁lâ 1 +▁traditionnelle 1 +▁victime 1 +▁University 1 +▁navigation 1 +▁influ 1 +▁entraîner 1 +▁organique 1 +organismes 1 +OCDE 1 +▁forêt 1 +▁perd 1 +▁Chef 1 +▁jeux 1 +▁passion 1 +▁licences 1 +▁MIN 1 +èse 1 +▁accessibles 1 +charge 1 +▁pire 1 +▁indu 1 +▁connue 1 +ologique 1 +▁dessus 1 +infection 1 +nière 1 +▁appropriés 1 +estimation 1 +épreuve 1 +▁pensez 1 +▁complexité 1 +▁ordonnance 1 +▁historiques 1 +▁approuvée 1 +▁soin 1 +attaque 1 +▁potentielle 1 +▁Belgique 1 +▁contribuable 1 +▁issus 1 +▁mm 1 +▁logistique 1 +▁massive 1 +ità 1 +▁réfléchir 1 +▁fonctionner 1 +▁gouverneur 1 +▁rappelé 1 +▁accueilli 1 +▁Autre 1 +▁2006-2007 1 +▁rapportant 1 +▁infraction 1 +bus 1 +▁Proposition 1 +▁milliard 1 +logique 1 +▁croissante 1 +▁bilan 1 +gué 1 +▁compagnies 1 +▁fournie 1 +▁liquide 1 +▁cuisine 1 +▁ancien 1 +▁cinquième 1 +▁suivent 1 +▁attentes 1 +acquérir 1 +▁perception 1 +mir 1 +▁révisée 1 +▁autrement 1 +▁anniversaire 1 +uri 1 +▁bactérie 1 +NO 1 +▁Robert 1 +▁modes 1 +▁âgés 1 +▁recueillir 1 +▁approuve 1 +▁allégations 1 +TP 1 +▁dûment 1 +▁Pol 1 +▁identifier 1 +▁Grèce 1 +▁avenir 1 +▁Peut 1 +entreprendre 1 +firm 1 +▁Lu 1 +▁ajoutée 1 +▁socio 1 +hôpital 1 +▁particules 1 +éclair 1 +GI 1 +▁visage 1 +▁intégrer 1 +place 1 +▁arabes 1 +OMS 1 +▁Nor 1 +ARC 1 +SG 1 +▁ateliers 1 +ima 1 +▁musée 1 +▁éventuel 1 +▁paramètres 1 +▁TED 1 +▁maintenu 1 +▁idéal 1 +▁Commerce 1 +▁équipements 1 +▁Italie 1 +▁Congrès 1 +▁Em 1 +▁personnelles 1 +▁Pologne 1 +aptitude 1 +indique 1 +▁allait 1 +▁collaborer 1 +éch 1 +▁appliquée 1 +▁hauteur 1 +Australie 1 +étend 1 +▁Vice 1 +▁pensent 1 +▁apporte 1 +▁franchi 1 +▁fondées 1 +TRA 1 +▁Siège 1 +▁établit 1 +▁industriels 1 +loi 1 +▁pensions 1 +▁fonctionnaire 1 +-8 1 +arch 1 +▁tchèque 1 +▁aspect 1 +IST 1 +▁permettront 1 +▁transactions 1 +product 1 +▁taxes 1 +▁Justification 1 +▁passagers 1 +▁potentiels 1 +▁éventail 1 +▁protéine 1 +▁transit 1 +▁connaît 1 +▁totalité 1 +▁nécessairement 1 +atelier 1 +BI 1 +major 1 +▁plate 1 +▁revenir 1 +▁Directive 1 +▁appliquées 1 +▁heure 1 +▁250 1 +▁Aucun 1 +▁600 1 +avère 1 +▁Tri 1 +▁pensée 1 +▁serre 1 +métrique 1 +▁bras 1 +▁Patrimoine 1 +▁coopérer 1 +▁entraîné 1 +▁diffusé 1 +▁litige 1 +itt 1 +▁barre 1 +ût 1 +indemnisation 1 +▁ST 1 +▁admis 1 +▁remarquer 1 +▁jouent 1 +▁regarde 1 +▁ententes 1 +▁Sans 1 +▁bulletin 1 +rice 1 +▁manifestations 1 +Franc 1 +▁renseignement 1 +▁Finlande 1 +sident 1 +lt 1 +issi 1 +▁transformer 1 +▁mobilité 1 +nit 1 +▁susceptible 1 +▁handicapés 1 +▁travaillant 1 +▁Université 1 +▁orale 1 +hum 1 +▁dimensions 1 +▁solaire 1 +▁compens 1 +▁résidents 1 +▁aidé 1 +▁cote 1 +tout 1 +République 1 +▁North 1 +▁dommage 1 +▁Beaucoup 1 +▁amples 1 +▁Cha 1 +2002 1 +▁différends 1 +entrepreneur 1 +▁naturels 1 +pend 1 +▁démographique 1 +MENT 1 +▁formulation 1 +▁combiné 1 +point 1 +étudier 1 +▁fixée 1 +▁parfait 1 +mod 1 +▁viable 1 +▁contingent 1 +▁colonne 1 +▁64 1 +ouille 1 +▁titulaires 1 +▁Protection 1 +▁expressément 1 +▁Activités 1 +▁longueur 1 +inquiétude 1 +▁détenu 1 +▁branche 1 +▁imagin 1 +▁défend 1 +▁fruits 1 +oyez 1 +▁délinquant 1 +▁module 1 +ash 1 +logie 1 +omique 1 +▁Mor 1 +▁with 1 +égo 1 +▁francophone 1 +▁oiseaux 1 +▁échantillons 1 +▁suggère 1 +▁accusé 1 +automne 1 +▁associée 1 +▁déplacées 1 +organe 1 +étendue 1 +▁fichiers 1 +pharmaceutique 1 +▁paraît 1 +comme 1 +▁engagés 1 +▁reli 1 +▁servent 1 +▁cité 1 +▁rubrique 1 +attaquer 1 +inventaire 1 +ommercialisation 1 +▁équilibre 1 +▁situe 1 +▁62 1 +▁adéquat 1 +▁fier 1 +rio 1 +ü 1 +▁disposent 1 +▁imm 1 +▁courante 1 +Ex 1 +acide 1 +▁Char 1 +Con 1 +▁compétents 1 +▁tentative 1 +▁soleil 1 +présent 1 +▁remettre 1 +mère 1 +▁scrutin 1 +▁superficie 1 +▁artistique 1 +▁débit 1 +▁technologiques 1 +Bretagne 1 +▁cohérence 1 +▁composantes 1 +Italie 1 +▁Timor 1 +▁?" 1 +▁remédier 1 +midi 1 +▁revendication 1 +▁effectifs 1 +▁Lake 1 +▁complément 1 +ual 1 +▁Liban 1 +▁veille 1 +▁affirmé 1 +▁savent 1 +▁01 1 +▁témoigne 1 +▁Christ 1 +▁frein 1 +▁AU 1 +▁universel 1 +▁dépit 1 +▁rechange 1 +argument 1 +▁Fin 1 +▁positifs 1 +▁obstacle 1 +▁collectif 1 +▁exemplaires 1 +III 1 +▁from 1 +influence 1 +▁étendu 1 +▁fabriqué 1 +▁voulait 1 +▁tiennent 1 +▁PCT 1 +▁résident 1 +▁63 1 +▁affiche 1 +▁1980 1 +▁révèle 1 +▁océans 1 +▁opérationnelle 1 +graphe 1 +▁comprenant 1 +▁2002-2003 1 +pour 1 +▁donnant 1 +▁navire 1 +illant 1 +▁voisin 1 +▁stockage 1 +▁ethniques 1 +▁correctement 1 +▁exempt 1 +pdf 1 +érée 1 +▁universitaires 1 +2000 1 +▁offrant 1 +étape 1 +▁composante 1 +assembl 1 +▁réputé 1 +▁renforcé 1 +▁Ki 1 +▁Deuxième 1 +opéra 1 +Égypte 1 +tter 1 +▁langage 1 +▁valable 1 +time 1 +accélérer 1 +vir 1 +Votre 1 +invention 1 +▁associées 1 +▁similaire 1 +▁1985 1 +▁démontre 1 +▁entretien 1 +▁ton 1 +▁conduire 1 +▁représenter 1 +homologation 1 +▁Néanmoins 1 +▁boîte 1 +▁rétro 1 +▁externes 1 +▁peau 1 +▁fiscal 1 +▁diffuser 1 +acqu 1 +rand 1 +▁amélioré 1 +▁automobile 1 +cup 1 +fort 1 +encourager 1 +▁bénéficient 1 +▁produisent 1 +▁restreint 1 +▁billet 1 +▁coordonn 1 +▁critère 1 +▁essayé 1 +▁thématique 1 +tine 1 +▁autorité 1 +▁Outre 1 +▁coin 1 +anglais 1 +.2.1 1 +▁bénéficiaire 1 +stitution 1 +▁chronique 1 +▁amélioration 1 +▁attendu 1 +▁monnaie 1 +▁option 1 +▁exclusivement 1 +▁mut 1 +agissait 1 +▁Musée 1 +▁ratifié 1 +▁attitude 1 +▁qualification 1 +▁remarque 1 +▁Statut 1 +▁reconnue 1 +excellence 1 +▁Bruxelles 1 +▁aperçu 1 +▁scénario 1 +▁déficit 1 +▁rétablissement 1 +▁réduite 1 +▁spécialisés 1 +2001 1 +▁TPS 1 +▁° 1 +▁recensement 1 +▁by 1 +▁renouvellement 1 +▁préférence 1 +page 1 +▁violent 1 +▁0,0 1 +▁compétente 1 +▁tableaux 1 +▁désignée 1 +▁style 1 +▁Adoption 1 +▁progresser 1 +▁interprétation 1 +estime 1 +utilité 1 +arg 1 +prop 1 +▁retenu 1 +1998 1 +▁troubles 1 +▁participe 1 +▁officieuses 1 +plan 1 +▁garçons 1 +▁magasin 1 +▁concret 1 +▁tort 1 +▁efficacité 1 +observateur 1 +▁POUR 1 +vage 1 +Agriculture 1 +▁pierre 1 +çons 1 +▁intra 1 +Indonésie 1 +assemblée 1 +oeuvre 1 +articul 1 +issé 1 +▁Jeux 1 +ssons 1 +▁école 1 +▁printemps 1 +▁média 1 +▁GRC 1 +oxyde 1 +▁rappel 1 +▁CNUCED 1 +ffin 1 +▁égal 1 +▁Vol 1 +ji 1 +▁argent 1 +▁courte 1 +▁priv 1 +▁voyageurs 1 +résolution 1 +tect 1 +▁désignation 1 +Autorité 1 +▁fasse 1 +Pro 1 +▁passée 1 +▁front 1 +▁chute 1 +▁cohésion 1 +▁célèbre 1 +▁paysage 1 +▁dignité 1 +▁Ben 1 +▁suscite 1 +▁citoyen 1 +▁envoyer 1 +▁appar 1 +-07 1 +▁saine 1 +velopp 1 +▁verser 1 +▁quotidien 1 +▁répand 1 +▁définit 1 +/00 1 +▁externe 1 +▁touchés 1 +▁Portugal 1 +▁rédigé 1 +▁responsabilis 1 +ÉS 1 +▁vide 1 +▁Revenu 1 +tude 1 +▁quasi 1 +▁organisationnel 1 +▁intéressante 1 +▁Leur 1 +tech 1 +thérapie 1 +▁virtuel 1 +▁handicap 1 +LC 1 +▁vécu 1 +▁écosystèmes 1 +▁vendre 1 +tudi 1 +▁Quant 1 +dg 1 +▁réunir 1 +▁éducatif 1 +▁Accord 1 +▁richesse 1 +▁coeur 1 +▁supprimer 1 +▁plastique 1 +▁vin 1 +OTAN 1 +▁Op 1 +▁concurrentiel 1 +▁suivie 1 +AND 1 +acceptation 1 +▁Danemark 1 +incertitude 1 +▁réglé 1 +▁Tél 1 +▁étions 1 +▁suffit 1 +▁trouverez 1 +▁Washington 1 +▁suggéré 1 +▁voté 1 +▁Règle 1 +▁frère 1 +hydr 1 +▁contraintes 1 +▁(3) 1 +rence 1 +▁future 1 +infraction 1 +▁trimestre 1 +▁vouloir 1 +admissibilité 1 +exploration 1 +▁effectif 1 +rap 1 +▁accéder 1 +▁Chypre 1 +▁attribuable 1 +▁symbole 1 +▁téléphonique 1 +▁administré 1 +▁occidentale 1 +▁terminer 1 +▁agences 1 +▁plage 1 +▁Imp 1 +copie 1 +▁tend 1 +▁atelier 1 +▁limitation 1 +▁partiel 1 +▁Public 1 +▁conformer 1 +Zélande 1 +▁Corporation 1 +▁Résumé 1 +▁discriminatoire 1 +▁sensibiliser 1 +▁visa 1 +échantillon 1 +▁Durant 1 +▁bref 1 +▁développe 1 +▁refusé 1 +▁pouvais 1 +▁dotation 1 +▁acheté 1 +ney 1 +▁sentir 1 +UC 1 +▁requise 1 +▁diagnostic 1 +▁Science 1 +organiser 1 +oubli 1 +africain 1 +IX 1 +UV 1 +▁légères 1 +▁absolu 1 +▁race 1 +▁étudiant 1 +▁compatible 1 +▁illustre 1 +▁prà 1 +▁content 1 +thèse 1 +▁mensuel 1 +▁spécialement 1 +htm 1 +Afghanistan 1 +▁détermine 1 +▁observateurs 1 +avancement 1 +▁domestique 1 +tivité 1 +▁pourrions 1 +▁cal 1 +ctor 1 +▁Fl 1 +identifier 1 +▁subsidiaire 1 +▁George 1 +▁Publications 1 +envoi 1 +▁hôte 1 +▁Résultats 1 +OSCE 1 +appelante 1 +▁strict 1 +▁organiser 1 +▁rencontrer 1 +▁Pal 1 +▁coordonnateur 1 +▁clientèle 1 +▁acheter 1 +▁Chi 1 +ball 1 +▁Pren 1 +▁chimique 1 +▁profondeur 1 +▁arrière 1 +implication 1 +▁souhaiter 1 +ographique 1 +▁Sierra 1 +û 1 +▁malade 1 +▁aliment 1 +suite 1 +▁magnifique 1 +® 1 +▁promoteur 1 +intervenant 1 +▁Consul 1 +▁Londres 1 +▁Gal 1 +▁Wal 1 +▁déposer 1 +invitation 1 +▁UN 1 +▁diplôme 1 +Argentine 1 +▁quarante 1 +▁disparu 1 +▁prenantes 1 +événement 1 +▁fixer 1 +▁DG 1 +essence 1 +▁CANADA 1 +▁cultiv 1 +▁acquise 1 +▁citer 1 +occupent 1 +guer 1 +HA 1 +▁démontrer 1 +Espagne 1 +▁fermé 1 +▁potable 1 +▁révél 1 +▁gestionnaire 1 +▁méta 1 +intégrer 1 +▁resté 1 +▁remis 1 +▁prie 1 +▁disparition 1 +▁connaissent 1 +▁douce 1 +▁Travail 1 +Israël 1 +augmenter 1 +▁décrire 1 +▁néglige 1 +▁encourageant 1 +▁Espagne 1 +▁distincte 1 +▁imprimé 1 +▁Hotel 1 +▁maternelle 1 +▁détection 1 +▁agriculteurs 1 +▁touristique 1 +▁palestinienne 1 +▁stock 1 +ÉE 1 +▁viande 1 +expiration 1 +obtention 1 +▁chap 1 +▁disque 1 +▁· 1 +Canada 1 +▁entrepreneurs 1 +ASS 1 +▁HCR 1 +/10 1 +administrateur 1 +importateur 1 +envoyer 1 +▁vendredi 1 +▁concentr 1 +dd 1 +aurait 1 +▁Rome 1 +▁croissant 1 +▁conviction 1 +interaction 1 +▁tente 1 +▁Bulgarie 1 +▁faisaient 1 +mail 1 +▁TV 1 +▁atmosphérique 1 +▁ouvrir 1 +▁certification 1 +▁plat 1 +▁Ob 1 +▁transmet 1 +▁98 1 +▁circonscription 1 +▁recouvrement 1 +entremise 1 +▁disposé 1 +▁constamment 1 +▁Gar 1 +▁administrateurs 1 +▁témoignage 1 +▁justifié 1 +▁arbres 1 +▁Sécurité 1 +▁demeurent 1 +▁répression 1 +▁vieille 1 +régional 1 +▁intensif 1 +▁recul 1 +▁prime 1 +option 1 +▁conducteur 1 +▁menacé 1 +%) 1 +▁ajoute 1 +▁provoque 1 +▁Pat 1 +▁leçon 1 +▁mineur 1 +▁tué 1 +▁Européen 1 +▁répondant 1 +▁retirer 1 +▁via 1 +▁procureur 1 +-09 1 +exercer 1 +▁leader 1 +▁Pêches 1 +oïde 1 +▁universitaire 1 +huile 1 +▁étage 1 +▁vêtements 1 +▁jardin 1 +▁Jusqu 1 +▁voter 1 +▁civilisation 1 +▁Peter 1 +▁préfér 1 +▁espérons 1 +▁éloigné 1 +étiquette 1 +▁immédiate 1 +▁orientale 1 +▁explicite 1 +▁affiché 1 +▁arrangements 1 +▁conservé 1 +incendie 1 +ception 1 +▁profond 1 +▁perdre 1 +▁destinée 1 +▁consultant 1 +glo 1 +▁saisir 1 +▁tax 1 +▁défendre 1 +▁vois 1 +ège 1 +OMM 1 +/58/ 1 +▁échantillon 1 +culture 1 +▁compléter 1 +▁corporel 1 +artiste 1 +Ambassadeur 1 +▁réfléchi 1 +▁justifier 1 +ajouter 1 +▁identité 1 +▁utilis 1 +▁Bell 1 +▁télécopie 1 +pression 1 +▁connexion 1 +▁émerge 1 +▁Ali 1 +OUR 1 +▁ram 1 +▁achats 1 +▁morceau 1 +path 1 +▁conversation 1 +▁psychologique 1 +▁Deuxièmement 1 +▁Suite 1 +▁démo 1 +inspire 1 +▁consommateur 1 +▁évolue 1 +épidémie 1 +▁Géorgie 1 +▁Finances 1 +▁évoqué 1 +▁fondation 1 +▁hôpitaux 1 +▁personnalité 1 +▁enceinte 1 +▁empêche 1 +▁Mat 1 +accorder 1 +▁sanction 1 +▁attaché 1 +TVH 1 +Rev 1 +chev 1 +▁retiré 1 +▁alloué 1 +▁polluants 1 +▁repas 1 +▁amp 1 +expédition 1 +sseur 1 +▁Croatie 1 +▁Leone 1 +▁lundi 1 +▁anglaise 1 +▁facilité 1 +▁2.1 1 +▁Chili 1 +▁Serbie 1 +▁commandant 1 +▁associ 1 +tron 1 +▁Communiqué 1 +instaurer 1 +nouveau 1 +OT 1 +▁autorise 1 +▁Océans 1 +▁TPSGC 1 +initi 1 +▁Dès 1 +▁péri 1 +atteign 1 +▁événement 1 +▁Kenya 1 +▁signaler 1 +▁sauvegarde 1 +ä 1 +▁génie 1 +invent 1 +▁rendent 1 +ONT 1 +national 1 +▁Palestine 1 +UNI 1 +▁confié 1 +▁classement 1 +▁Procureur 1 +▁parfaitement 1 +gion 1 +▁Hongrie 1 +▁automatiquement 1 +▁dégage 1 +▁lucratif 1 +ingénieur 1 +▁tendant 1 +duire 1 +arbre 1 +▁gravité 1 +extension 1 +▁disait 1 +▁espère 1 +sectoriel 1 +tract 1 +▁Dia 1 +▁clôture 1 +▁récolte 1 +▁vend 1 +▁répondent 1 +▁(2001) 1 +▁Seul 1 +▁apparaît 1 +▁légèrement 1 +▁automatique 1 +▁mille 1 +▁Cabinet 1 +▁Malheureusement 1 +▁suiv 1 +▁noire 1 +▁vieux 1 +▁armé 1 +ok 1 +EU 1 +▁Trois 1 +▁comparable 1 +▁Lisbonne 1 +▁contiennent 1 +▁côtière 1 +ouvrir 1 +▁vague 1 +▁Research 1 +▁coopérative 1 +▁valide 1 +▁servant 1 +glement 1 +▁censé 1 +acheteur 1 +▁précédemment 1 +▁équilibré 1 +▁distingue 1 +▁3.1 1 +nor 1 +▁Bay 1 +ulf 1 +▁coupable 1 +▁2005-2006 1 +▁obligé 1 +▁déposant 1 +Alliance 1 +▁législative 1 +tag 1 +étal 1 +▁Général 1 +▁café 1 +▁investisseurs 1 +▁4.1 1 +onique 1 +▁rayon 1 +▁vulnérabilité 1 +Г 1 +▁séquence 1 +▁souffre 1 +2,5 1 +▁plafond 1 +/59/ 1 +adresser 1 +▁antérieur 1 +▁intégrante 1 +▁souveraineté 1 +▁thé 1 +▁Sub 1 +▁parvenu 1 +▁comm 1 +coup 1 +▁profondément 1 +▁métaux 1 +▁tombe 1 +ated 1 +abus 1 +▁aéronefs 1 +ordonnance 1 +▁stupéfiant 1 +▁synthèse 1 +▁Post 1 +ACDI 1 +▁Maroc 1 +▁Nunavut 1 +ssaient 1 +▁dépendance 1 +▁incluant 1 +▁éventuellement 1 +orateur 1 +▁bateau 1 +satisf 1 +▁bateaux 1 +▁catastrophe 1 +▁apparent 1 +▁souffrance 1 +▁poussé 1 +aura 1 +phi 1 +▁Fort 1 +▁péril 1 +▁carré 1 +▁salarié 1 +▁créancier 1 +▁journaux 1 +▁Sha 1 +▁hydro 1 +▁Att 1 +▁lecteur 1 +▁tolérance 1 +▁évidemment 1 +▁suspension 1 +▁faux 1 +▁significatif 1 +objection 1 +▁affecte 1 +exige 1 +▁routier 1 +▁accepte 1 +▁suppos 1 +▁beau 1 +▁exploité 1 +▁120 1 +▁Conseiller 1 +▁indirect 1 +▁Limited 1 +100 1 +▁dirige 1 +▁courrier 1 +identifi 1 +▁demeur 1 +1999 1 +▁imposées 1 +▁transformé 1 +admission 1 +▁1986 1 +▁combler 1 +▁colonie 1 +▁italien 1 +212) 1 +cription 1 +▁recens 1 +enseignant 1 +▁Rwanda 1 +échéance 1 +▁radical 1 +▁prioritaire 1 +▁Mark 1 +▁quitter 1 +▁emprunt 1 +▁gènes 1 +▁vraie 1 +ignant 1 +‰ 1 +▁minoritaire 1 +/60/ 1 +trans 1 +▁attendre 1 +▁pertinence 1 +▁concrète 1 +▁puni 1 +▁relèvent 1 +▁revient 1 +Enquête 1 +épi 1 +épargne 1 +▁exceptionnelle 1 +lib 1 +▁progressivement 1 +▁modernisation 1 +ambassade 1 +efforce 1 +▁accro 1 +▁pourriez 1 +▁diminuer 1 +▁Décide 1 +▁cumul 1 +▁requ 1 +Commission 1 +▁religieux 1 +▁fausse 1 +▁manger 1 +▁surpris 1 +▁ressource 1 +assainissement 1 +▁portable 1 +▁grossesse 1 +ATIONS 1 +liquer 1 +inclusion 1 +▁blessé 1 +▁constituée 1 +Ottawa 1 +▁iii 1 +▁Tim 1 +LES 1 +▁Australie 1 +▁commencent 1 +▁bruit 1 +▁attaques 1 +▁constat 1 +▁Contact 1 +▁assumer 1 +MIN 1 +arbitrage 1 +▁privilège 1 +▁meurt 1 +▁surmonter 1 +▁reconnaissant 1 +▁bientôt 1 +▁MPO 1 +/11 1 +▁assisté 1 +▁2.2 1 +▁basse 1 +▁divulgation 1 +char 1 +▁cellule 1 +▁dollar 1 +▁officiellement 1 +ARI 1 +▁renforcée 1 +Yougoslavie 1 +▁fiducie 1 +introduire 1 +arrivée 1 +▁poursuit 1 +▁arrivée 1 +▁criminelle 1 +▁décharge 1 +oppose 1 +▁probabilité 1 +▁décideurs 1 +▁Winnipeg 1 +▁détruit 1 +▁avancer 1 +annonce 1 +▁témoin 1 +▁territoriaux 1 +▁Côte 1 +ink 1 +▁Adresse 1 +▁Kar 1 +▁TRANS 1 +▁reproduit 1 +▁publier 1 +empêcher 1 +haus 1 +▁modifiant 1 +▁attirer 1 +Président 1 +▁confirme 1 +élargir 1 +▁Introduction 1 +▁ménages 1 +INS 1 +▁Indiens 1 +▁INC 1 +▁Membre 1 +▁analysé 1 +▁BCE 1 +▁féminine 1 +▁précoce 1 +▁fournissant 1 +▁mobilisation 1 +vironnementales 1 +icité 1 +ECT 1 +▁mécanique 1 +▁possession 1 +▁pertinente 1 +ÉT 1 +▁Michael 1 +▁Financement 1 +▁sommaire 1 +în 1 +▁Aussi 1 +tisme 1 +▁Commentaires 1 +▁Cinquième 1 +MR 1 +verbal 1 +▁Prix 1 +▁voudra 1 +▁fenêtre 1 +sphère 1 +▁Somalie 1 +▁décrites 1 +▁plaque 1 +▁Commissariat 1 +▁aborder 1 +▁informel 1 +▁respectifs 1 +▁confidentiel 1 +appréciation 1 +▁dépistage 1 +▁revanche 1 +▁schéma 1 +▁douleur 1 +▁amené 1 +▁honor 1 +▁spécifiquement 1 +aéronef 1 +équation 1 +▁2003-2004 1 +▁English 1 +▁accueille 1 +▁viabilité 1 +▁cohérente 1 +▁feront 1 +▁vieillissement 1 +ozone 1 +ship 1 +▁dépasser 1 +▁exploiter 1 +▁confirmer 1 +▁Martin 1 +▁(« 1 +▁aquatique 1 +0,00 1 +▁théâtre 1 +μ 1 +▁courage 1 +tendent 1 +Azerbaïdjan 1 +▁bilatéraux 1 +▁963- 1 +▁densité 1 +/57/ 1 +▁touché 1 +agé 1 +1997 1 +Ê 1 +▁Luxembourg 1 +▁déficience 1 +▁escompté 1 +▁inclut 1 +▁analogue 1 +▁communique 1 +▁repr 1 +▁entrave 1 +▁portefeuille 1 +COM 1 +▁couple 1 +▁intervenir 1 +▁faune 1 +ANCE 1 +▁améliorée 1 +▁Partenariat 1 +▁talent 1 +▁considér 1 +▁englobe 1 +allégation 1 +▁possèdent 1 +▁tissu 1 +▁dérivé 1 +▁consenti 1 +▁souris 1 +▁Retour 1 +▁Télécopieur 1 + 1 +▁comptabilité 1 +▁hu 1 +▁simultané 1 +▁jurisprudence 1 +▁ressortissants 1 +▁métier 1 +KO 1 +Pierre 1 +▁ouvre 1 +▁citoyenneté 1 +▁iraquien 1 +▁propice 1 +phone 1 +change 1 +▁Commun 1 +nçant 1 +importantes 1 +▁Application 1 +▁transnationale 1 +horaire 1 +insuffisance 1 +▁supprimé 1 +▁Coût 1 +▁étoiles 1 +▁TIC 1 +▁pourrez 1 +▁tenter 1 +intimé 1 +▁fraude 1 +▁tranche 1 +▁Régime 1 +▁spectre 1 +▁paye 1 +± 1 +▁Fonction 1 +▁revoir 1 +autoriser 1 +▁renforce 1 +▁mathématique 1 +étant 1 +ISS 1 +▁souvenir 1 +atch 1 +acé 1 +faire 1 +mou 1 +▁# 1 +Arctique 1 +▁extrait 1 +▁700 1 +▁Quoi 1 +▁comparé 1 +▁converti 1 +▁présumé 1 +/2001/ 1 +utilise 1 +▁particip 1 +▁soumet 1 +▁croit 1 +▁différ 1 +▁restructuration 1 +▁camion 1 +▁libellé 1 +▁spécifié 1 +▁vacances 1 +▁prévoient 1 +ERS 1 +éliminer 1 +▁opposé 1 +▁charbon 1 +▁déclin 1 +incapacité 1 +▁recruté 1 +AGE 1 +▁Premièrement 1 +▁refuser 1 +méthyl 1 +▁James 1 +▁restant 1 +▁retrouver 1 +▁march 1 +▁forestier 1 +▁déclarer 1 +DH 1 +/61/ 1 +▁grosse 1 +ford 1 +— 1 +ENCE 1 +▁Sommaire 1 +intensité 1 +▁Secteur 1 +agression 1 +lio 1 +ncée 1 +▁calculer 1 +▁délivrance 1 +inverse 1 +▁Bern 1 +▁bain 1 +appuyant 1 +▁vérifié 1 +▁brûl 1 +▁délit 1 +mettez 1 +installe 1 +Année 1 +▁prospérité 1 +▁déten 1 +▁bâti 1 +▁Koweït 1 +▁compliqué 1 +▁mentionnées 1 +▁passeport 1 +▁gratuitement 1 +▁varié 1 +▁marginal 1 +agne 1 +.1.1 1 +troisième 1 +▁diplômé 1 +▁zéro 1 +▁Territoires 1 +▁féliciter 1 +▁menu 1 +▁libéralisation 1 +▁légal 1 +(3) 1 +▁Hol 1 +▁OK 1 +▁Consultations 1 +▁guéri 1 +▁signer 1 +▁correspondance 1 +rifi 1 +région 1 +WG 1 +deuxième 1 +▁recourir 1 +▁photographie 1 +/12 1 +▁World 1 +▁félicité 1 +▁marcher 1 +▁administr 1 +source 1 +▁correction 1 +fusion 1 +▁retourner 1 +▁Calgary 1 +▁cérémonie 1 +▁originaire 1 +interprète 1 +▁indien 1 +▁ressenti 1 +ELLE 1 +▁insiste 1 +▁visiter 1 +▁correspondent 1 +.3.1 1 +▁météorologique 1 +Homme 1 +▁définitif 1 +▁préservation 1 +Ordre 1 +▁observe 1 +▁analytique 1 +▁royale 1 +▁descriptif 1 +▁député 1 +Commissariat 1 +▁manipul 1 +▁forestière 1 +TES 1 +▁voient 1 +QUE 1 +§ 1 +▁comportant 1 +▁stipule 1 +bal 1 +▁japonais 1 +▁prolongé 1 +▁réconciliation 1 +▁levée 1 +bré 1 +▁directrice 1 +▁fiche 1 +▁spectacle 1 +▁Journée 1 +▁biotechnologie 1 +▁préféré 1 +▁afférent 1 +atténuation 1 +▁réagir 1 +Qu 1 +graphie 1 +▁chien 1 +▁vaut 1 +▁compétition 1 +▁inconnu 1 +inflation 1 +alerte 1 +▁imparti 1 +▁sauver 1 +archi 1 +▁exemplaire 1 +envergure 1 +▁Quelques 1 +▁effectue 1 +POS 1 +▁paquet 1 +▁(1999) 1 +▁réussir 1 +▁orateurs 1 +▁analyser 1 +▁posent 1 +▁établissant 1 +investir 1 +▁vital 1 +▁Prend 1 +ignore 1 +▁Slovénie 1 +▁fardeau 1 +/2004/ 1 +▁exiger 1 +bio 1 +▁validité 1 +bec 1 +▁familial 1 +▁approfondi 1 +{ 1 +▁réaffirme 1 +▁versées 1 +▁enseignements 1 +▁écrire 1 +▁rive 1 +▁librement 1 +institutionnelle 1 +Com 1 +▁Google 1 +▁inutile 1 +UD 1 +▁concurrent 1 +▁tap 1 +▁cardiaque 1 +▁sensiblement 1 +artisan 1 +dium 1 +▁marketing 1 +▁mobiliser 1 +/01 1 +▁assorti 1 +▁circuit 1 +▁contemporain 1 +istique 1 +▁Mesdames 1 +▁décor 1 +▁SCT 1 +Laurent 1 +▁CRTC 1 +▁serveur 1 +▁supervision 1 +entraînement 1 +▁kilo 1 +incitation 1 +Comité 1 +▁consacrer 1 +chloro 1 +/62/ 1 +intervalle 1 +▁conclut 1 +▁bienfaisance 1 +ASFC 1 +▁femelle 1 +▁pot 1 +autant 1 +▁sucre 1 +▁Coopération 1 +quatrième 1 +INE 1 +~ 1 +/2000/ 1 +▁arguments 1 +▁roche 1 +▁immédiat 1 +▁raconter 1 +▁complété 1 +Algérie 1 +▁prononcée 1 +accomplir 1 +▁baie 1 +▁sélectionné 1 +▁dérogation 1 +▁Type 1 +étudiant 1 +ING 1 +▁artificiel 1 +Edmonton 1 +▁molécule 1 +▁reprendre 1 +} 1 +▁finir 1 +▁provoqué 1 +▁facture 1 +▁abandonné 1 +accusation 1 +atteinte 1 +▁mâle 1 +▁contraignant 1 +▁exportateurs 1 +▁recueil 1 +accident 1 +▁feux 1 +▁1984 1 +▁internet 1 +▁prononcer 1 +▁interprété 1 +Ayant 1 +pel 1 +▁touchées 1 +lip 1 +▁Messieurs 1 +▁procède 1 +▁réservoir 1 +▁tombé 1 +▁clef 1 +oxygène 1 +▁formelle 1 +▁multilatéraux 1 +▁Étude 1 +informer 1 +▁concentré 1 +inspecteur 1 +▁appartient 1 +▁symptômes 1 +équipage 1 +▁décisionnel 1 +▁procurer 1 +annulation 1 +▁bénéficie 1 +▁dotée 1 +▁compl 1 +▁minière 1 +▁Development 1 +Ouganda 1 +▁immigrants 1 +bourg 1 +▁métro 1 +▁pertinent 1 +▁William 1 +▁sécuritaire 1 +▁vérificateur 1 +▁wh 1 +▁harmonisé 1 +explorer 1 +pass 1 +▁VII 1 +▁vendeur 1 +▁prononcé 1 +▁stimuler 1 +utili 1 +▁salue 1 +▁électeurs 1 +.2.2 1 +▁Han 1 +▁commandement 1 +▁Durée 1 +rup 1 +cinquième 1 +▁Tour 1 +erreur 1 +▁véritablement 1 +embauche 1 +emprisonnement 1 +▁blanchiment 1 +métrie 1 +▁Richard 1 +étiquetage 1 +▁réinsertion 1 +apprendre 1 +▁fabriquer 1 +▁Company 1 +▁prouver 1 +Irak 1 +▁Corr 1 +▁discuté 1 +▁tire 1 +Éthiopie 1 +▁bilingue 1 +▁caméra 1 +▁Titre 1 +gov 1 +▁douane 1 +▁soupçon 1 +▁gagné 1 +▁arbitraire 1 +▁atlantique 1 +▁dégradation 1 +wood 1 +▁combattants 1 +▁heurt 1 +http 1 +▁restriction 1 +▁suffi 1 +▁prolonge 1 +▁MDN 1 +▁cercle 1 +▁Ville 1 +▁Gaza 1 +économique 1 +▁saurait 1 +accessibilité 1 +▁distribuer 1 +▁soulève 1 +attendre 1 +▁souple 1 +▁fermement 1 +▁Bibliothèque 1 +groupe 1 +▁calme 1 +apparition 1 +▁River 1 +▁cellulaire 1 +▁interrogé 1 +▁étudie 1 +▁habilité 1 +▁fur 1 +▁nulle 1 +▁souterrain 1 +outils 1 +attentat 1 +École 1 +▁protège 1 +▁signaux 1 +phosph 1 +agrément 1 +▁exceptionnel 1 +▁Pourtant 1 +UNESCO 1 +▁personnage 1 +▁quitté 1 +▁prendra 1 +bac 1 +Édouard 1 +▁avéré 1 +▁heureuse 1 +▁résume 1 +▁clos 1 +▁remonte 1 +▁proviennent 1 +officier 1 +▁Halifax 1 +▁traduire 1 +▁saumon 1 +▁condamnation 1 +▁piste 1 +▁consistant 1 +▁méthodologie 1 +inquiète 1 +▁canada 1 +▁réexamen 1 +▁furent 1 +/2006/ 1 +/2005/ 1 +achèvement 1 +▁alternative 1 +▁joindre 1 +▁textile 1 +▁indiennes 1 +/2003/ 1 +▁pêcheurs 1 +▁constituant 1 +▁créativité 1 +▁Réserve 1 +▁traditionnel 1 +▁Arrêt 1 +expulsion 1 +XV 1 +vide 1 +pyr 1 +▁regret 1 +▁câble 1 +▁souches 1 +▁toucher 1 +▁neige 1 +EST 1 +▁concession 1 +▁socioéconomique 1 +▁blanche 1 +▁souhaitable 1 +▁adressé 1 +▁détenteur 1 +ONG 1 +▁peinture 1 +immunité 1 +▁prévision 1 +▁Burundi 1 +▁Haïti 1 +▁Guerre 1 +▁inspiré 1 +▁faim 1 +Opération 1 +▁informelle 1 +▁comparativement 1 +▁récepteur 1 +immobilisation 1 +▁séparation 1 +▁Manuel 1 +eck 1 +▁chirurgie 1 +▁rémunéré 1 +▁structuré 1 +▁ciel 1 +Angleterre 1 +▁Lituanie 1 +▁[...] 1 +▁renferme 1 +invite 1 +▁apparu 1 +essentiel 1 +▁canal 1 +▁fréquemment 1 +▁suppression 1 +▁Aff 1 +▁nutrition 1 +▁faiblesse 1 +▁enseigne 1 +▁créant 1 +▁sauvetage 1 +▁Pérou 1 +▁réalise 1 +▁commentaire 1 +accise 1 +▁malheureusement 1 +inscriv 1 +existait 1 +war 1 +0.00 1 +▁exigent 1 +Annexe 1 +▁rétablir 1 +▁dumping 1 +Ivoire 1 +▁protégées 1 +ILL 1 +▁évolué 1 +▁diriger 1 +▁âgé 1 +▁discut 1 +▁Valeur 1 +< 1 +apprécie 1 +uck 1 +▁demandant 1 +▁subir 1 +▁beauté 1 +hol 1 +formation 1 +valent 1 +▁trace 1 +▁fonctionnent 1 +▁moral 1 +▁sonore 1 +▁dépassé 1 +territorial 1 +▁nettement 1 +intolérance 1 +▁déplacer 1 +▁chanson 1 +▁segment 1 +évolu 1 +▁garantit 1 +▁compétent 1 +clin 1 +▁complémentaire 1 +énoncé 1 +instauration 1 +▁Imaginez 1 +▁délibérations 1 +▁Chacun 1 +▁reporter 1 +PORT 1 +▁Tre 1 +cru 1 +▁collègue 1 +▁Philippines 1 +aviation 1 +▁Costa 1 +▁fonctionnelle 1 +▁typique 1 +▁2008-2009 1 +▁confidentialité 1 +▁fermeture 1 +interface 1 +▁bénéficié 1 +▁Celui 1 +▁vallée 1 +▁(2004) 1 +▁imaginer 1 +EPA 1 +▁fête 1 +▁Nigéria 1 +▁motivation 1 +▁prévoyant 1 +▁souten 1 +entendre 1 +▁Slovaquie 1 +▁condamn 1 +▁Registre 1 +▁transitoire 1 +ORD 1 +▁réside 1 +issons 1 +gri 1 +▁expose 1 +crit 1 +entrevue 1 +▁Macédoine 1 +▁humide 1 +▁Lacs 1 +▁magazine 1 +‘ 1 +▁touchent 1 +▁corriger 1 +▁Sénat 1 +▁bouge 1 +▁Darfour 1 +▁rayonnement 1 +clenche 1 +▁constant 1 +Sud 1 +▁pilier 1 +▁CFP 1 +▁boule 1 +▁circul 1 +▁minute 1 +▁assurant 1 +ë 1 +exemption 1 +imposent 1 +effectif 1 +▁distributeur 1 +▁offices 1 +améliore 1 +asi 1 +▁Compagnie 1 +▁olympique 1 +▁végétal 1 +▁cesser 1 +Article 1 +▁Analyse 1 +▁Contribution 1 +▁problématique 1 +▁troupes 1 +▁recommand 1 +▁cabinet 1 +▁croyance 1 +▁tourner 1 +▁fonctionnel 1 +▁Considérant 1 +▁renvoyé 1 +▁compensation 1 +▁Park 1 +▁agréé 1 +▁__________ 1 +▁Kyoto 1 +▁défavoris 1 +▁favorisant 1 +▁blé 1 +▁éclairé 1 +▁tube 1 +▁Méd 1 +▁convenable 1 +▁doigt 1 +▁routière 1 +▁résidant 1 +▁notable 1 +▁intérim 1 +/55/ 1 +▁afghan 1 +arbitre 1 +harmonie 1 +▁signale 1 +▁puits 1 +ôme 1 +trice 1 +▁réputation 1 +▁fallu 1 +▁fraction 1 +Estonie 1 +▁grec 1 +intelligence 1 +appartenance 1 +▁réaffirmé 1 +▁consacre 1 +▁muni 1 +habitation 1 +work 1 +▁1970 1 +informatique 1 +▁Réponse 1 +▁émanant 1 +▁énonce 1 +▁capture 1 +▁Très 1 +▁redevance 1 +▁assume 1 +▁tomber 1 +▁remarqué 1 +▁motivé 1 +▁Relations 1 +▁attendant 1 +▁Processus 1 +▁impôts 1 +▁excessive 1 +▁Formation 1 +▁courbe 1 +▁1982 1 +aurais 1 +▁démonstration 1 +▁incorporé 1 +▁travailleur 1 +droit 1 +▁jeudi 1 +▁consolider 1 +▁Marc 1 +▁Group 1 +▁haine 1 +▁insulaires 1 +▁signification 1 +Hôtel 1 +▁médiation 1 +▁jaune 1 +▁Question 1 +▁objective 1 +▁libéré 1 +entraîne 1 +▁observ 1 +excédent 1 +▁Victoria 1 +▁précède 1 +/2002/ 1 +▁gars 1 +imagine 1 +▁Américains 1 +▁rentable 1 +▁producteur 1 +▁transférer 1 +▁collège 1 +▁constatations 1 +▁Lettonie 1 +=" 1 +_______ 1 +▁progressive 1 +azote 1 +but 1 +type 1 +invalidité 1 +▁goût 1 +Arménie 1 +▁golf 1 +Rouge 1 +▁Peu 1 +morph 1 +▁doter 1 +Monténégro 1 +▁transfrontière 1 +▁étroit 1 +▁mange 1 +▁reconnaiss 1 +▁ratio 1 +audiovisuel 1 +▁incapable 1 +▁sincère 1 +employer 1 +envisager 1 +Direct 1 +accueillir 1 +▁interactions 1 +▁confort 1 +▁apparaître 1 +▁collabore 1 +▁Report 1 +▁Définition 1 +▁harcèlement 1 +▁munitions 1 +efforcer 1 +adolescent 1 +▁Bangladesh 1 +▁ralenti 1 +▁génial 1 +inclure 1 +▁officiers 1 +ressource 1 +/56/ 1 +▁étrange 1 +▁déduction 1 +▁cigarette 1 +▁Encourage 1 +▁considérant 1 +TRANS 1 +▁dirigeant 1 +hibit 1 +info 1 +▁Thaïlande 1 +▁fragment 1 +▁partiellement 1 +▁gagne 1 +exactitude 1 +▁fassent 1 +▁indésirable 1 +▁négocier 1 +▁sévère 1 +▁hasard 1 +▁sérieusement 1 +▁occidental 1 +▁Supplément 1 +▁appartement 1 +ough 1 +▁arbitral 1 +▁trompe 1 +▁arriva 1 +lette 1 +CHE 1 +▁précieux 1 +▁diffère 1 +▁réadaptation 1 +▁précieuse 1 +Colombie 1 +mond 1 +emporte 1 +mic 1 +▁Travaux 1 +▁commémor 1 +▁masculin 1 +▁pensais 1 +▁biologie 1 +▁vivement 1 +▁2000-2001 1 +▁émet 1 +insolvabilité 1 +▁documentaire 1 +▁tourne 1 +plus 1 +▁douanière 1 +▁récompense 1 +▁souplesse 1 +▁Transport 1 +▁transaction 1 +▁Louis 1 +▁libéral 1 +arrestation 1 +▁durabilité 1 +▁plomb 1 +▁sanguin 1 +Industrie 1 +▁pluie 1 +▁Promotion 1 +▁terrible 1 +▁infecté 1 +▁frappe 1 +▁dixième 1 +▁prépare 1 +/63/ 1 +▁Directrice 1 +▁pénétr 1 +▁dépression 1 +ó 1 +▁Palestiniens 1 +▁minorité 1 +1995 1 +▁justifie 1 +▁Bulletin 1 +▁restauration 1 +ward 1 +▁cessation 1 +▁Madrid 1 +▁antiterroriste 1 +▁hommage 1 +▁contenir 1 +▁grief 1 +▁Contactez 1 +hospital 1 +▁biodiversité 1 +▁retenir 1 +anthrop 1 +▁réaliste 1 +échantillonnage 1 +▁Off 1 +▁PMA 1 +▁suppl 1 +▁divisé 1 +▁PNUE 1 +▁prisonniers 1 +▁prenez 1 +▁privilégié 1 +exploitant 1 +▁matérielle 1 +église 1 +▁déplacé 1 +▁mardi 1 +▁Singapour 1 +/2008/ 1 +▁sorti 1 +affirmation 1 +▁paludisme 1 +▁recyclage 1 +▁amener 1 +hydro 1 +▁souhaité 1 +appendice 1 +▁Paiements 1 +▁annulé 1 +▁maître 1 +▁Radio 1 +▁remet 1 +▁coefficient 1 +▁rigoureuse 1 +lève 1 +accéder 1 +script 1 +▁substantielle 1 +▁utilisons 1 +▁0,1 1 +▁Society 1 +▁(2000) 1 +▁State 1 +uj 1 +▁mettra 1 +▁confusion 1 +▁Hong 1 +▁projection 1 +▁vivons 1 +▁viens 1 +▁caisse 1 +▁dispers 1 +▁spécialiste 1 +inscrit 1 +▁Crédit 1 +valuation 1 +élevage 1 +écosystème 1 +▁1,5 1 +▁pluriannuel 1 +▁prélèvement 1 +▁tenté 1 +▁léger 1 +▁préparatifs 1 +▁promesse 1 +▁roulant 1 +▁nuage 1 +benz 1 +▁suisse 1 +▁Libéria 1 +▁ultra 1 +adore 1 +▁roman 1 +abilit 1 +▁primordial 1 +▁sécheresse 1 +▁concertée 1 +▁électoral 1 +▁Rica 1 +▁fiscaux 1 +▁imprimable 1 +▁théorique 1 +▁invoqué 1 +▁assister 1 +2010 1 +hygiène 1 +▁fauteuil 1 +▁conduis 1 +▁that 1 +▁subséquent 1 +▁Principes 1 +▁jambe 1 +iya 1 +▁allié 1 +▁ressembl 1 +auraient 1 +ogène 1 +harmoniser 1 +▁compassion 1 +▁porc 1 +▁diabète 1 +▁imprimer 1 +Arabie 1 +▁guère 1 +▁croient 1 +éclairage 1 +▁filtre 1 +▁bonheur 1 +▁mercure 1 +▁sportif 1 +▁vive 1 +▁suspendu 1 +▁Figure 1 +▁CRDI 1 +▁inégalités 1 +▁prostitution 1 +▁débiteur 1 +▁correctionnel 1 +▁manifestation 1 +▁Mouvement 1 +▁retombées 1 +▁certifié 1 +▁détecter 1 +▁Médiateur 1 +défini 1 +▁parlons 1 +▁noyau 1 +▁prévisible 1 +▁débouché 1 +enveloppe 1 +version 1 +▁subsistance 1 +▁Island 1 +efficience 1 +▁pénitentiaire 1 +▁Malaisie 1 +ABLE 1 +hydrogène 1 +▁voulaient 1 +▁suspect 1 +▁program 1 +IER 1 +▁Canadien 1 +actualité 1 +▁FNUAP 1 +▁déclarant 1 +FORM 1 +▁découvr 1 +▁honnête 1 +▁inacceptable 1 +▁reconnaissent 1 +▁Lieu 1 +▁mercredi 1 +▁stabilisation 1 +▁justification 1 +Albanie 1 +▁mourir 1 +▁demandons 1 +▁déployé 1 +▁frappé 1 +▁maximal 1 +▁repris 1 +▁brochure 1 +AIEA 1 +▁précipit 1 +▁Royal 1 +stein 1 +▁convaincre 1 +▁disparaît 1 +▁négocié 1 +▁pesticides 1 +▁déroulé 1 +▁enrichi 1 +IÈRE 1 +▁Tarif 1 +extr 1 +▁Présentation 1 +▁configuration 1 +▁littoral 1 +▁contrepartie 1 +▁agréable 1 +▁filiale 1 +session 1 +▁Carte 1 +▁nommer 1 +algorithme 1 +innocuité 1 +▁Bélarus 1 +▁Technologie 1 +▁bénéfique 1 +▁crédibilité 1 +▁Smith 1 +▁payable 1 +▁laissez 1 +hop 1 +iversification 1 +▁chauffage 1 +▁référendum 1 +▁préventive 1 +▁trouvant 1 +troph 1 +impunité 1 +▁pédagogique 1 +▁retrouvé 1 +▁réservation 1 +▁URL 1 +▁progression 1 +enlèvement 1 +well 1 +▁Produits 1 +▁quiconque 1 +▁flotte 1 +Armée 1 +▁normalisation 1 +pac 1 +− 1 +âme 1 +▁chevauch 1 +▁différemment 1 +▁permanence 1 +▁bataille 1 +▁Water 1 +▁ratifier 1 +▁Sept 1 +▁pareil 1 +▁yougoslave 1 +▁fixation 1 +▁atténuer 1 +▁usées 1 +▁visuel 1 +essor 1 +▁Michel 1 +▁Tru 1 +▁grippe 1 +▁scolarité 1 +▁xénophobie 1 +▁métal 1 +▁Lanka 1 +▁oublier 1 +▁sensibilité 1 +▁planifier 1 +Organisme 1 +▁moléculaire 1 +▁Où 1 +▁club 1 +▁1981 1 +▁normalisé 1 +employ 1 +▁mutation 1 +▁modifie 1 +▁infantile 1 +▁récapitul 1 +▁souscrit 1 +UNE 1 +▁Participation 1 +▁bouche 1 +▁System 1 +▁Venezuela 1 +▁Chris 1 +▁faculté 1 +▁technicien 1 +▁livré 1 +/2007/ 1 +▁perfection 1 +▁paie 1 +▁confortable 1 +hébergement 1 +▁génome 1 +▁pomme 1 +▁conversion 1 +▁exclusive 1 +▁éliminé 1 +▁lendemain 1 +▁retourné 1 +Musique 1 +octobre 1 +▁fruit 1 +▁impliquant 1 +▁Séance 1 +oreille 1 +▁Garde 1 +▁flexibilité 1 +▁néerlandais 1 +▁imposable 1 +▁délicat 1 +▁Élection 1 +▁Invite 1 +▁loisirs 1 +pêche 1 +▁termine 1 +▁Tunisie 1 +color 1 +01-00 1 +EAU 1 +▁triste 1 +▁pénurie 1 +▁archives 1 +▁Appel 1 +▁réfugié 1 +accomplissement 1 +Nouvelle 1 +▁Référence 1 +▁confirmation 1 +▁millénaire 1 +▁répertoire 1 +▁Wood 1 +▁visuelle 1 +▁douze 1 +▁Actuellement 1 +▁Sénégal 1 +▁littéralement 1 +▁accéléré 1 +▁Ghana 1 +▁substitut 1 +▁gère 1 +méditerranéen 1 +▁constructeur 1 +▁précité 1 +▁infirmières 1 +addition 1 +▁évalue 1 +IDE 1 +utodétermination 1 +▁inévitable 1 +▁Contrôle 1 +▁inhérent 1 +▁oriental 1 +▁immobiliers 1 +▁fiabilité 1 +▁Guatemala 1 +▁corrélation 1 +▁maternité 1 +▁récupération 1 +ón 1 +hélicoptère 1 +▁commença 1 +▁SERVICES 1 +▁catalogue 1 +▁asiatique 1 +Éducation 1 +▁instituts 1 +▁ravi 1 +▁Malte 1 +▁actualisé 1 +▁libanais 1 +▁semblait 1 +▁DANS 1 +▁incompatible 1 +▁indépendamment 1 +▁Africa 1 +▁impératif 1 +▁Situation 1 +▁(2005) 1 +CONF 1 +burg 1 +▁généré 1 +Habitat 1 +▁élabore 1 +▁Beijing 1 +▁Combien 1 +▁silence 1 +▁Mots 1 +▁Doha 1 +▁créateur 1 +▁portion 1 +▁suscité 1 +▁Jordanie 1 +▁obtient 1 +▁écho 1 +▁discrétion 1 +▁prenons 1 +Entente 1 +▁Organ 1 +▁roue 1 +▁Index 1 +▁sérieuse 1 +▁vraisemblable 1 +unanimité 1 +▁aînés 1 +▁abordable 1 +▁Jérusalem 1 +▁génocide 1 +ONUDI 1 +▁Global 1 +▁Moldova 1 +▁foncier 1 +▁vétérinaire 1 +▁syrienne 1 +▁publie 1 +section 1 +▁Vérification 1 +▁2001-2002 1 +architecte 1 +▁Quatre 1 +▁rationnelle 1 +▁Puisque 1 +extradition 1 +Quatrième 1 +▁tuberculose 1 +▁cyber 1 +▁présidé 1 +▁prélevé 1 +discrimination 1 +▁prudent 1 +▁déroulement 1 +▁cinéma 1 +▁(2002) 1 +alliance 1 +▁modeste 1 +▁0,5 1 +▁ANNEXE 1 +▁Tanzanie 1 +▁synergie 1 +▁opérateurs 1 +▁conviendrait 1 +▁Super 1 +▁prière 1 +▁Cinquante 1 +▁Réaffirmant 1 +▁formidable 1 +▁réchauffement 1 +▁tabagisme 1 +Organe 1 +▁amène 1 +▁strictement 1 +▁légumes 1 +▁respiratoire 1 +▁vélo 1 +▁fuite 1 +▁écouter 1 +____ 1 +▁légère 1 +▁accessoires 1 +▁exerçant 1 +▁manifesté 1 +▁rejoindre 1 +▁constructive 1 +▁découlent 1 +▁desquelles 1 +▁construis 1 +▁aidant 1 +▁envoie 1 +▁conviennent 1 +▁Photo 1 +▁Aperçu 1 +autoroute 1 +▁bienvenue 1 +▁613- 1 +▁boissons 1 +extraction 1 +iii 1 +ingénierie 1 +▁Données 1 +▁soudain 1 +▁FAO 1 +▁exclusif 1 +▁South 1 +▁propagation 1 +▁surprenant 1 +▁sympa 1 +TURE 1 +▁Council 1 +héritage 1 +▁structurels 1 +▁Livre 1 +extrémité 1 +▁Excellence 1 +▁Laissez 1 +▁sœur 1 +▁aéroports 1 +communication 1 +▁pollu 1 +▁contamination 1 +▁anticipé 1 +▁inciter 1 +Angola 1 +service 1 +▁remplace 1 +▁accumulé 1 +▁suédois 1 +édifice 1 +humain 1 +▁indirectement 1 +atténu 1 +▁Californie 1 +▁Fabrication 1 +▁Gouverneur 1 +▁Népal 1 +▁attiré 1 +habilitation 1 +▁exécutive 1 +▁Bank 1 +▁patron 1 +consult 1 +▁apprécié 1 +▁mémorandum 1 +▁nettoyage 1 +▁athlètes 1 +▁tumeur 1 +▁armements 1 +▁sépare 1 +▁souverain 1 +▁United 1 +▁docteur 1 +▁confère 1 +Commissaire 1 +▁passif 1 +▁Thomas 1 +▁School 1 +allégement 1 +▁Haye 1 +▁gouvern 1 +▁supplément 1 +Uruguay 1 +▁conciliation 1 +▁Contexte 1 +▁exhort 1 +▁immense 1 +▁(2003) 1 +▁acides 1 +........... 1 +▁stipul 1 +▁Maurice 1 +▁Ajout 1 +▁préfère 1 +▁constituait 1 +▁toxicomanie 1 +▁faillite 1 +▁simulation 1 +▁entrevues 1 +accumulation 1 +▁Accès 1 +▁Gazette 1 +▁Regardez 1 +▁exhaustive 1 +▁ordonné 1 +projet 1 +▁renouvelé 1 +▁provient 1 +annuler 1 +Équateur 1 +▁bizarre 1 +▁saoudite 1 +▁Permettez 1 +▁(2006) 1 +▁connecté 1 +aventure 1 +▁constructif 1 +▁éducative 1 +▁fleuve 1 +▁Human 1 +▁Charles 1 +équivalent 1 +animaux 1 +▁Myanmar 1 +▁innovant 1 +entretenir 1 +▁réagi 1 +▁sectorielle 1 +▁minimal 1 +▁matériau 1 +▁adapt 1 +▁énumérés 1 +▁atomique 1 +▁brillant 1 +▁prouvé 1 +▁fabrique 1 +▁XXIe 1 +▁portuaire 1 +▁Établissement 1 +système 1 +occurrence 1 +▁suicide 1 +▁bouteille 1 +▁Street 1 +▁féminin 1 +▁transforme 1 +▁privilégi 1 +▁divergence 1 +▁personnalisé 1 +▁représentatif 1 +▁croyons 1 +▁Ibid 1 +▁Culture 1 +▁enlevé 1 +▁Niveau 1 +▁Nairobi 1 +▁destinataire 1 +▁refléter 1 +▁gare 1 +▁Steve 1 +▁thermique 1 +▁combine 1 +entraide 1 +ñ 1 +▁délibéré 1 +▁Parfois 1 +▁Fond 1 +▁survenus 1 +esclavage 1 +▁merveilleux 1 +▁clandestin 1 +▁bombe 1 +▁cerner 1 +▁caché 1 +▁DEMANDE 1 +▁Frank 1 +▁détruire 1 +▁prévalence 1 +▁générer 1 +▁mutuel 1 +▁Management 1 +▁envahi 1 +document 1 +▁subordonn 1 +avril 1 +▁attaqué 1 +▁convergence 1 +▁explosifs 1 +▁PRÉ 1 +▁accept 1 +affecter 1 +▁Concernant 1 +avertissement 1 +▁desquels 1 +▁certes 1 +▁émotionnel 1 +▁Divers 1 +▁College 1 +▁spirituel 1 +▁diamètre 1 +éprouve 1 +Ø 1 +▁Catégorie 1 +▁batterie 1 +▁muscle 1 +▁barème 1 +▁résidentiel 1 +intro 1 +▁0,2 1 +▁drapeau 1 +▁contractuelle 1 +▁requiert 1 +accréditation 1 +▁Salvador 1 +▁générique 1 +▁panneaux 1 +▁Texte 1 +▁intense 1 +entreposage 1 +▁récit 1 +▁garçon 1 +▁crédible 1 +▁bagage 1 +▁lentement 1 +▁terroriste 1 +▁arme 1 +▁Kazakhstan 1 +▁déployer 1 +▁résidus 1 +▁simplifier 1 +▁inhumains 1 +▁qualifi 1 +▁regrette 1 +▁différend 1 +▁efficient 1 +▁chrétien 1 +APECA 1 +▁infections 1 +▁médiateur 1 +▁circulaire 1 +▁réviser 1 +▁connexe 1 +▁Columbia 1 +▁excessif 1 +▁énormément 1 +▁ingrédients 1 +assure 1 +arrangement 1 +absorption 1 +▁British 1 +▁Finalement 1 +▁cadeau 1 +▁travaillons 1 +excuse 1 +▁évoque 1 +▁devais 1 +▁géant 1 +▁insisté 1 +▁posséder 1 +▁salariale 1 +▁toxicité 1 +▁plateforme 1 +▁consigné 1 +systématiquement 1 +▁orienté 1 +▁inventé 1 +somm 1 +1994 1 +▁Modification 1 +▁2007-2008 1 +▁chèque 1 +▁incluent 1 +▁pandémie 1 +▁ponctuel 1 +▁étonnant 1 +default 1 +▁Syrie 1 +accompagn 1 +exposé 1 +ontrairement 1 +▁clarifier 1 +▁postsecondaire 1 +▁souviens 1 +▁Exemple 1 +▁trouvait 1 +SCIAN 1 +interroge 1 +▁assistant 1 +field 1 +▁Suivant 1 +explosion 1 +▁Parallèlement 1 +▁doctorat 1 +▁fantastique 1 +viendront 1 +▁connaissez 1 +▁connaissons 1 +▁émotions 1 +▁montrant 1 +▁Décennie 1 +▁modélisation 1 +▁poursuivra 1 +affichage 1 +▁venait 1 +▁ISO 1 +▁cherché 1 +cyclo 1 +▁fragile 1 +▁validation 1 +interpréter 1 +▁survivre 1 +▁vapeur 1 +▁Encore 1 +▁devriez 1 +▁décédé 1 +▁Disposition 1 +▁rentabilité 1 +▁verbale 1 +▁apparemment 1 +▁réjouis 1 +▁apprécier 1 +▁prenne 1 +▁philosophie 1 +▁récupérer 1 +▁consolidé 1 +▁visibilité 1 +▁centraux 1 +▁créature 1 +▁guidé 1 +▁Fournir 1 +▁aveugle 1 +empreinte 1 +▁auxiliaire 1 +injection 1 +▁exploitants 1 +▁littérature 1 +▁pouvions 1 +▁simplifié 1 +▁quantitative 1 +▁neutre 1 +▁hebdomadaire 1 +▁octroyé 1 +accessible 1 +▁productive 1 +▁oublié 1 +Vidéo 1 +conseil 1 +dimension 1 +abondance 1 +▁Objet 1 +▁Zimbabwe 1 +▁relance 1 +▁Center 1 +▁modéré 1 +Église 1 +▁Coordonnateur 1 +▁piscine 1 +▁tactique 1 +altitude 1 +▁redressement 1 +▁linéaire 1 +▁vocation 1 +▁denrées 1 +▁estimons 1 +▁monument 1 +▁effectu 1 +▁Engagements 1 +▁Méthode 1 +▁Cliquez 1 +▁présidentielle 1 +▁Normes 1 +▁Petit 1 +▁calibre 1 +▁rédiger 1 +▁contesté 1 +▁fidèle 1 +▁minéraux 1 +▁aspirations 1 +▁publicitaire 1 +▁rentrer 1 +▁pauvre 1 +institut 1 +écriture 1 +▁corrigé 1 +▁détérioration 1 +▁magnétique 1 +▁Qatar 1 +▁Croix 1 +▁ultime 1 +▁geste 1 +▁chaussures 1 +▁Souligne 1 +▁comptait 1 +économiste 1 +Islande 1 +provincial 1 +▁Collège 1 +▁galaxie 1 +▁reproduire 1 +▁animé 1 +▁Mandat 1 +psych 1 +▁suspens 1 +▁essayons 1 +▁multinationale 1 +▁champignon 1 +▁Central 1 +▁survivant 1 +▁désertification 1 +▁facilitation 1 +▁instauré 1 +▁perturbation 1 +▁consultez 1 +▁Berlin 1 +▁Observations 1 +▁desservi 1 +▁brièvement 1 +▁brève 1 +▁libérer 1 +▁maïs 1 +▁démarrage 1 +aquaculture 1 +▁influencé 1 +▁macroéconomique 1 +▁déploie 1 +▁apparaissent 1 +ã 1 +▁cohérent 1 +▁nutritionnel 1 +▁Network 1 +▁précaution 1 +ï 1 +▁minéral 1 +gouvernement 1 +imagination 1 +▁Copenhague 1 +▁maintenance 1 +▁institué 1 +▁détient 1 +▁spatial 1 +▁souffrant 1 +▁précédant 1 +clamation 1 +▁Utilisation 1 +▁légitimité 1 +▁regroupé 1 +▁Business 1 +▁métallique 1 +▁diffuse 1 +enzyme 1 +ology 1 +▁Présidence 1 +▁réduisant 1 +▁affiliée 1 +▁Bolivie 1 +▁ramener 1 +impulsion 1 +▁compromettre 1 +développement 1 +Érythrée 1 +ˆ 1 +▁réclame 1 +apparence 1 +▁plonge 1 +▁spec 1 +▁couvrant 1 +▁renforçant 1 +▁unilatérale 1 +▁cheval 1 +▁dîner 1 +enjeu 1 +▁cancér 1 +▁usagers 1 +▁attire 1 +▁consécutive 1 +▁cérébral 1 +▁tunnel 1 +▁productif 1 +attrape 1 +# 1 +▁flexible 1 +▁puissions 1 +interruption 1 +▁jouissent 1 +▁convoqué 1 +▁demandait 1 +▁lycée 1 +▁contaminants 1 +▁pathogène 1 +▁Formulaire 1 +▁Conscient 1 +▁interculturel 1 +▁revendiqu 1 +▁refroidi 1 +▁pompe 1 +▁spectaculaire 1 +▁céréales 1 +▁imputable 1 +▁pensait 1 +eizième 1 +glyc 1 +▁dimanche 1 +▁festival 1 +▁baleine 1 +▁joie 1 +▁pâte 1 +▁CONSEIL 1 +▁PARTIE 1 +▁signataires 1 +▁transfrontalier 1 +▁Recueil 1 +▁Balkans 1 +▁recruter 1 +▁appartiennent 1 +▁biomasse 1 +▁contradiction 1 +▁soulignant 1 +▁Précédent 1 +ébauche 1 +▁chimie 1 +attestation 1 +▁Cameroun 1 +▁Noël 1 +▁Stephen 1 +▁attentivement 1 +▁superviseur 1 +obésité 1 +▁CANADIEN 1 +▁pharmacie 1 +▁terrasse 1 +▁coalition 1 +affection 1 +▁Liechtenstein 1 +▁engendre 1 +Instance 1 +▁quinze 1 +▁Amélioration 1 +▁congrès 1 +▁rembourser 1 +▁végétation 1 +▁Work 1 +amorce 1 +▁Windows 1 +▁douanier 1 +▁Pensez 1 +▁samedi 1 +▁entamé 1 +▁Stockholm 1 +▁félicitons 1 +▁musulmans 1 +▁rupture 1 +▁Black 1 +▁entourant 1 +▁Daniel 1 +▁pain 1 +▁ultérieur 1 +▁Facebook 1 +▁Women 1 +animation 1 +▁tuyau 1 +▁Engage 1 +▁poursuivent 1 +▁succession 1 +î 1 +▁entouré 1 +α 1 +▁merveilleuse 1 +▁Traitement 1 +▁novateur 1 +affiche 1 +▁bétail 1 +▁possédant 1 +▁victoire 1 +espérance 1 +ificateur 1 +étoile 1 +▁Enregistrement 1 +▁précurseur 1 +Occident 1 +▁acquitté 1 +▁navigable 1 +▁soufre 1 +▁Améliorer 1 +▁Cambodge 1 +▁onzième 1 +Histoire 1 +▁Strasbourg 1 +▁antidumping 1 +▁endommagé 1 +▁gagnant 1 +▁thérapeutiques 1 +▁semestre 1 +▁raciste 1 +instabilité 1 +▁honte 1 +▁RAPPORT 1 +▁frontaliers 1 +▁marquage 1 +▁préjudiciable 1 +▁stimulant 1 +▁trimestriel 1 +▁prévoyait 1 +▁clarté 1 +▁indigènes 1 +▁prospère 1 +▁achète 1 +émergence 1 +▁estimatif 1 +embryon 1 +▁Microsoft 1 +▁Méditerranée 1 +▁immigré 1 +▁excédentaire 1 +▁injuste 1 +▁rassemblé 1 +▁Province 1 +▁soigneusement 1 +▁Modifier 1 +▁musicale 1 +▁améliorant 1 +▁municipaux 1 +▁règne 1 +▁Joseph 1 +▁représentaient 1 +▁procure 1 +conférence 1 +▁Mécanisme 1 +▁australien 1 +▁félicitant 1 +▁mammifères 1 +▁policière 1 +invasion 1 +▁gardien 1 +immigrant 1 +▁accrédité 1 +▁dégagé 1 +▁traduis 1 +▁terminant 1 +entrepôt 1 +▁rénovation 1 +▁réparer 1 +vingt 1 +▁battre 1 +éthane 1 +▁prudence 1 +▁désastre 1 +▁éclat 1 +▁portail 1 +▁persiste 1 +Traduction 1 +▁poumon 1 +privé 1 +▁pharmaco 1 +alphabétisation 1 +▁Transfert 1 +▁comprenait 1 +▁pilotage 1 +▁Changement 1 +▁Français 1 +▁Procédure 1 +▁insectes 1 +▁rapatriement 1 +optimiser 1 +▁sénateur 1 +▁grève 1 +` 1 +émetteur 1 +▁illégal 1 +▁divise 1 +▁OTTAWA 1 +▁fascinant 1 +évènement 1 +▁rapprochement 1 +▁refuge 1 +▁radiation 1 +Atelier 1 +▁Renforcement 1 +▁ambitieux 1 +▁certitude 1 +Alzheimer 1 +▁collaborateurs 1 +▁flagrant 1 +▁ambiant 1 +investigation 1 +Ouzbékistan 1 +▁Burkina 1 +▁Festival 1 +▁combustion 1 +autrui 1 +attirer 1 +▁législateur 1 +▁General 1 +▁amusant 1 +▁deviendra 1 +inconvénient 1 +× 1 +▁percevoir 1 +IFICATION 1 +▁Department 1 +▁aléatoire 1 +▁marchande 1 +▁résiduel 1 +▁télécharger 1 +▁Scott 1 +▁infligé 1 +▁dramatique 1 +▁nominal 1 +aluminium 1 +transfrontalière 1 +▁COMMISSION 1 +▁immunitaire 1 +écrivain 1 +▁Power 1 +anomalie 1 +▁ordonne 1 +▁Prestation 1 +▁Profil 1 +enlever 1 +Islam 1 +Ç 1 +Administrateur 1 +▁dénommé 1 +▁hectare 1 +▁coïncid 1 +▁Priorité 1 +ouillé 1 +▁dégâts 1 +▁portugais 1 +▁suspendre 1 +▁énumérées 1 +▁Mettr 1 +entamer 1 +▁salubrité 1 +▁tangible 1 +▁cheveux 1 +▁commandé 1 +▁Rights 1 +▁Surveillance 1 +▁synthétique 1 +▁jouir 1 +▁Session 1 +▁Visite 1 +▁Structure 1 +▁composite 1 +▁précédé 1 +▁meurent 1 +nouvelle 1 +figure 1 +▁Prairies 1 +▁défaillance 1 +▁automatisé 1 +▁HTML 1 +▁croyez 1 +▁nôtre 1 +7,5 1 +▁affaibli 1 +Ancien 1 +▁béton 1 +▁entretenu 1 +viendrait 1 +amitié 1 +avortement 1 +aggrave 1 +▁Situé 1 +▁favori 1 +éthanol 1 +▁Responsabilité 1 +▁gratitude 1 +▁prototype 1 +▁remboursé 1 +extinction 1 +▁Food 1 +▁Soixante 1 +▁imprévu 1 +▁rattaché 1 +▁colloque 1 +▁dividende 1 +▁patrouille 1 +▁Réaffirme 1 +▁ruisseau 1 +▁retire 1 +effectue 1 +Infrastructure 1 +analyste 1 +▁pétrolière 1 +▁remboursable 1 +▁reddition 1 +▁épuisé 1 +▁Classification 1 +▁Nicaragua 1 +▁Dossier 1 +▁favorisé 1 +irrégularité 1 +évacuation 1 +▁réciproque 1 +▁simplification 1 +Entreprise 1 +▁Airlines 1 +▁caution 1 +accumule 1 +▁contracté 1 +▁phoque 1 +insertion 1 +▁authentique 1 +▁semences 1 +▁prescription 1 +Amsterdam 1 +enthousiasme 1 +▁invisible 1 +▁représentait 1 +▁1999-2000 1 +▁Museum 1 +phényl 1 +▁tierce 1 +▁Métis 1 +apprenant 1 +Indien 1 +▁qualitative 1 +▁représentative 1 +▁spécificité 1 +▁consistait 1 +▁Olympique 1 +▁démobilisation 1 +▁persistance 1 +▁plongé 1 +▁Fraser 1 +▁cartographie 1 +▁Tchad 1 +▁Création 1 +▁anglophone 1 +▁empêché 1 +▁irlandais 1 +▁jouissance 1 +embargo 1 +Effect 1 +▁terminal 1 +▁Philip 1 +▁trajet 1 +▁ventilation 1 +▁permettait 1 +▁détecté 1 +▁thermo 1 +automobile 1 +▁doctrine 1 +▁subdivision 1 +′ 1 +ć 1 +о 1 +Ô 1 +ú 1 +¢ 1 +š 1 +č 1 +е 1 +а 1 +Å 1 +и 1 +β 1 +Ö 1 +ο 1 +Î 1 +À 1 +ø 1 +н 1 +т 1 +■ 1 +й 1 +÷ 1 +å 1 +Ž 1 +⁄ 1 +Á 1 +с 1 +ι 1 +ς 1 +р 1 +ν 1 +π 1 +σ 1 +“ 1 +τ 1 +æ 1 +в 1 +ε 1 +Œ 1 +ρ 1 +Š 1 +≤ 1 +√ 1 +Õ 1 +ß 1 +κ 1 +∗ 1 +л 1 +ل 1 +ž 1 +Δ 1 +ا 1 +£ 1 +ł 1 +≥ 1 +¡ 1 +ì 1 +м 1 +^ 1 +γ 1 +к 1 +Û 1 +→ 1 +¶ 1 +λ 1 +† 1 +η 1 +¿ 1 +ı 1 +ί 1 +д 1 +Ä 1 +С 1 +ý 1 +Ï 1 +δ 1 +у 1 +ό 1 +ي 1 +Ο 1 +п 1 +Ü 1 +● 1 +ù 1 +ò 1 +¤ 1 +› 1 +Ó 1 +ę 1 +ė 1 +я 1 +ş 1 +ر 1 +õ 1 +Č 1 +ـ 1 +̄ 1 +ă 1 +‚ 1 +ÿ 1 +Π 1 +― 1 +ā 1 +、 1 +Ł 1 +б 1 +υ 1 +□ 1 +ы 1 +г 1 +ń 1 +ع 1 +θ 1 +ω 1 +ь 1 +م 1 +ت 1 +Τ 1 +Í 1 +► 1 +َ 1 +ą 1 +ī 1 +ة 1 +Σ 1 +ř 1 +Ñ 1 +ð 1 +ŕ 1 +ч 1 +„ 1 +Ë 1 +‡ 1 +ƒ 1 +ή 1 +ب 1 +ن 1 +ū 1 +د 1 +¥ 1 +ά 1 +─ 1 +ś 1 +ж 1 +ف 1 +و 1 +В 1 +¦ 1 +х 1 +⎯ 1 +ᑦ 1 +ق 1 +ż 1 +ц 1 +Ý 1 +Α 1 +ţ 1 +Ù 1 +Ú 1 +Ђ 1 +‹ 1 +χ 1 +ώ 1 +έ 1 +Þ 1 +ю 1 +Ş 1 +Р 1 +ň 1 +ύ 1 +ē 1 +ʼ 1 +Ÿ 1 +Æ 1 +ě 1 +ő 1 +ǫ 1 +ِ 1 +Ò 1 +ʹ 1 +Κ 1 +ᐅ 1 +Ε 1 +þ 1 +Ω 1 +ᐃ 1 +Ð 1 +ľ 1 +̊ 1 +أ 1 +⇒ 1 +ᒃ 1 +¬ 1 +٠ 1 +ų 1 +ш 1 +غ 1 +⌦ 1 +Μ 1 +ج 1 +ْ 1 +Φ 1 +ґ 1 +ك 1 +≠ 1 +◗ 1 +خ 1 +Ì 1 +ᖃ 1 +ᓄ 1 +١ 1 +٢ 1 +Т 1 +ض 1 +ᑐ 1 +∑ 1 +国 1 +Λ 1 +Ś 1 +➧ 1 +і 1 +ᑕ 1 +П 1 +· 1 +́ 1 +的 1 +ɔ 1 +ť 1 +Θ 1 +Ģ 1 +Ţ 1 +ź 1 +، 1 +س 1 +إ 1 +İ 1 +ّ 1 +ᓯ 1 +ᕐ 1 +ᑎ 1 +。 1 +法 1 +Γ 1 +ɛ 1 +Ă 1 +ɶ 1 +Ν 1 +⋅ 1 +М 1 +э 1 +ط 1 +亚 1 +会 1 +• 1 +尔 1 +利 1 +ъ 1 +ů 1 +Η 1 +ᓂ 1 +❒ 1 +Ő 1 +ʔ 1 +≡ 1 +ξ 1 +ф 1 +К 1 +ᓗ 1 +ᖅ 1 +大 1 +兰 1 +О 1 +Ċ 1 +ō 1 +ش 1 +ُ 1 +Ż 1 +ї 1 +ᓕ 1 +▲ 1 +✓ 1 +❏ 1 +〈 1 +글 1 +한 1 +А 1 +Н 1 +ᓇ 1 +ᓐ 1 +年 1 +和 1 +事 1 +٩ 1 +į 1 +ċ 1 +ζ 1 +Ќ 1 +ذ 1 +ħ 1 +Ľ 1 +ψ 1 +Ē 1 +Ğ 1 +ȇ 1 +̂ 1 +̱ 1 +Б 1 +ه 1 +٤ 1 +٥ 1 +ᒥ 1 +ᖓ 1 +※ 1 +← 1 +∞ 1 +❑ 1 +❚ 1 +อ 1 +ᕆ 1 +ز 1 +尼 1 +一 1 +提 1 +在 1 +不 1 +上 1 +各 1 +巴 1 +加 1 +ё 1 +ј 1 +٣ 1 +‟ 1 + 1 +Ġ 1 +ʺ 1 +ˇ 1 +̍ 1 +Ά 1 +٦ 1 +∼ 1 +❖ 1 +➟ 1 +Ń 1 +Д 1 +ᐊ 1 +ᖏ 1 +ᒪ 1 +ᓪ 1 +阿 1 +北 1 +拉 1 +了 1 +合 1 +关 1 +由 1 +编 1 +为 1 +机 1 +主 1 +并 1 +对 1 +布 1 +任 1 +制 1 +牙 1 +表 1 +塞 1 +斯 1 +ظ 1 +٨ 1 +ย 1 +ĕ 1 +ű 1 +ᓈ 1 +’ 1 +Ė 1 +Ρ 1 +ح 1 +ى 1 +พ 1 +ศ 1 +่ 1 +ᑭ 1 +ᒋ 1 +ᓚ 1 +ᔪ 1 +↓ 1 +└ 1 +ル 1 +ー 1 +比 1 +马 1 +년 1 +Ą 1 +И 1 +ص 1 +及 1 +联 1 +第 1 +日 1 +我 1 +介 1 +用 1 +于 1 +作 1 +出 1 +员 1 +要 1 +发 1 +式 1 +文 1 +致 1 +秘 1 +个 1 +书 1 +埃 1 +本 1 +俄 1 +伊 1 +罗 1 +鲁 1 +瓜 1 +Е 1 +ท 1 +∂ 1 +首 1 +ď 1 +­ 1 +ư 1 +̨ 1 +Й 1 +ث 1 +Ⴑ 1 +ᑲ 1 +ᒻ 1 +ᕗ 1 +ᖕ 1 +ờ 1 +↑ 1 +╚ 1 +╩ 1 +▬ 1 +➥ 1 +➾ 1 +サ 1 +下 1 +定 1 +报 1 +施 1 +民 1 +玩 1 +站 1 +章 1 +过 1 +通 1 +ћ 1 +ᕋ 1 +行 1 +乌 1 +西 1 +克 1 +以 1 +与 1 +所 1 +面 1 +高 1 +其 1 +力 1 +♫ 1 +诠 1 +译 1 +理 1 +犯 1 +爱 1 +权 1 +律 1 +弃 1 +号 1 +厅 1 +做 1 +伯 1 +众 1 +中 1 +☐ 1 +ệ 1 +ᒧ 1 +ᑯ 1 +ᐱ 1 +ร 1 +Э 1 +Џ 1 +ƴ 1 +Ř 1 +ļ 1 +Ě 1 +票 1 +死 1 +刑 1 +❍ 1 +ế 1 +З 1 +̇ 1 +ʾ 1 +Ů 1 +ķ 1 +Đ 1 +规 1 +务 1 +则 1 +决 1 +・ 1 +写 1 +官 1 +必 1 +持 1 +搁 1 +耽 1 +Љ 1 +Ѓ 1 +Ι 1 +◊ 1 +适 1 +ᕕ 1 +Β 1 +Ћ 1 +Υ 1 +套 1 +工 1 +料 1 +材 1 +现 1 +绍 1 +ȣ 1 +议 1 +✤ 1 +Њ 1 +他 1 +免 1 +避 1 +ƙ 1 +≈ 1 +ᓴ 1 +◆ 1 +们 1 +供 1 +保 1 +前 1 +口 1 +另 1 +外 1 +头 1 +府 1 +政 1 +正 1 +确 1 +简 1 +语 1 +щ 1 +̃ 1 +✔ 1 +ў 1 +公 1 +处 1 +有 1 +构 1 +组 1 +织 1 +间 1 +đ 1 +⌫ 1 +匈 1 +卡 1 +厄 1 +古 1 +坦 1 +基 1 +果 1 +萄 1 +葡 1 +◄ 1 +买 1 +印 1 +塔 1 +多 1 +安 1 +度 1 +廷 1 +德 1 +意 1 +朗 1 +根 1 +纳 1 +芬 1 +道 1 +∆ 1 +ѓ 1 +执 1 +昨 1 +被 1 +І 1 +Ф 1 +ต 1 +า 1 +ิ 1 +์ 1 +仁 1 +今 1 +席 1 +毛 1 +纪 1 +ϊ 1 +Л 1 +向 1 +念 1 +Į 1 +ǎ 1 +ǐ 1 +ữ 1 +赞 1 +| 1 + 1 +Ā 1 +Ę 1 +Ħ 1 +Ī 1 +ĺ 1 +Ļ 1 +Ņ 1 +ņ 1 +Ň 1 +Ō 1 +Ū 1 +Ų 1 +ŷ 1 +ơ 1 +Ƴ 1 +ɲ 1 +̀ 1 +̆ 1 +̐ 1 +̓ 1 +Ψ 1 +Ч 1 +Ш 1 +Ъ 1 +ғ 1 +ұ 1 +ט 1 +ء 1 +ً 1 +ٱ 1 +प 1 +म 1 +र 1 +े 1 +् 1 +ง 1 +ถ 1 +น 1 +ว 1 +ั 1 +ี 1 +ึ 1 +ᄅ 1 +ᐸ 1 +ᑖ 1 +ᒫ 1 +ᓅ 1 +ᓖ 1 +ᔨ 1 +ᔭ 1 +ᔾ 1 +ᕈ 1 +ᕌ 1 +ᕙ 1 +Ẕ 1 +ạ 1 +ầ 1 +ẹ 1 +ọ 1 +ớ 1 +ứ 1 +‛ 1 +↔ 1 +↵ 1 +∕ 1 +∫ 1 +⊂ 1 +⊕ 1 +⌃ 1 +⌘ 1 +⌠ 1 +⌧ 1 +★ 1 +☺ 1 +♀ 1 +♂ 1 +✕ 1 +✜ 1 +✢ 1 +➔ 1 +➢ 1 +➪ 1 +➯ 1 +『 1 +』 1 +【 1 +】 1 +の 1 +ア 1 +オ 1 +カ 1 +ス 1 +ド 1 +レ 1 +ン 1 +举 1 +予 1 +五 1 +付 1 +伏 1 +伝 1 +你 1 +例 1 +先 1 +八 1 +共 1 +典 1 +内 1 +况 1 +刘 1 +刚 1 +動 1 +午 1 +南 1 +去 1 +反 1 +台 1 +吉 1 +告 1 +商 1 +园 1 +圆 1 +坚 1 +堂 1 +如 1 +实 1 +希 1 +干 1 +开 1 +录 1 +形 1 +情 1 +成 1 +拜 1 +放 1 +晚 1 +智 1 +東 1 +架 1 +格 1 +案 1 +梶 1 +次 1 +气 1 +沙 1 +津 1 +浦 1 +添 1 +澳 1 +王 1 +球 1 +瑞 1 +産 1 +疆 1 +线 1 +美 1 +群 1 +耀 1 +肯 1 +腊 1 +葱 1 +见 1 +記 1 +记 1 +贝 1 +达 1 +运 1 +迪 1 +送 1 +達 1 +邦 1 +防 1 +院 1 +隆 1 +Ć 1 +明 1 +ᒐ 1 +パ 1 +リ 1 +山 1 +ϋ 1 +ᑑ 1 +ᖑ 1 +危 1 +地 1 +є 1 +ѕ 1 +љ 1 +京 1 +名 1 +复 1 +最 1 +核 1 +经 1 +丹 1 +列 1 +宁 1 +来 1 +桑 1 +泊 1 +特 1 +荷 1 +莫 1 +▼ 1 +━ 1 +φ 1 +方 1 +Є 1 +➤ 1 +ğ 1 +з 1 +参 1 +将 1 +按 1 +授 1 +新 1 +是 1 +更 1 +活 1 +照 1 +综 1 +○ 1 +∙ 1 +̧ 1 +♦ 1 +动 1 +努 1 +委 1 +尤 1 +效 1 +率 1 +Ё 1 +♪ 1 +̈ 1 +▪ 1 +◦ 1 +月 1 + 1 diff --git a/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/spm_unigram10000.model b/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/spm_unigram10000.model new file mode 100644 index 0000000000000000000000000000000000000000..e603ff160ae2523547a835529f54541a5ef07f2f --- /dev/null +++ b/SpeechT5/SpeechUT/dataset/MuSTC/en_fr/spm_unigram10000.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a9fa64123372b685dce1617ab4c6af34cdf1f022b2c718b859899dfefcdc42 +size 410452 diff --git a/SpeechT5/SpeechUT/speechut/__init__.py b/SpeechT5/SpeechUT/speechut/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97327d269e93a13cd135f6c1a187fd820a8decb8 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/__init__.py @@ -0,0 +1 @@ +from . import data, tasks, criterions, models diff --git a/SpeechT5/SpeechUT/speechut/config/finetune_asr/speechut_base_100h.yaml b/SpeechT5/SpeechUT/speechut/config/finetune_asr/speechut_base_100h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..736c3c72b9a7ba85eacaf44e1952fa7f0fc15a4f --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/config/finetune_asr/speechut_base_100h.yaml @@ -0,0 +1,101 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 100 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 1 + keep_last_epochs: 1 + keep_best_checkpoints: 5 + best_checkpoint_metric: dec_accuracy + maximize_best_checkpoint_metric: true + restore_file: checkpoint_last.pt + +distributed_training: + ddp_backend: legacy_ddp + find_unused_parameters: true + distributed_world_size: 1 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: joint_sc2t_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: false # must be consistent with pre-training + labels: ["ltr"] + store_labels: true + single_target: true + add_decoder_target: true + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +dataset: + num_workers: 0 + max_tokens: 1300000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_100 + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: ctc_ce + zero_infinity: true + +optimization: + max_update: 40000 + lr: [0.00001] + sentence_avg: true + update_freq: [2] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: speechut_asr + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + add_decoder: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/SpeechUT/speechut/config/finetune_asr/speechut_large_100h.yaml b/SpeechT5/SpeechUT/speechut/config/finetune_asr/speechut_large_100h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7cbc59e61f10ab00b997286d6355f22ce1008677 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/config/finetune_asr/speechut_large_100h.yaml @@ -0,0 +1,102 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 100 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 1 + keep_last_epochs: 5 + keep_best_checkpoints: 5 + best_checkpoint_metric: dec_accuracy + maximize_best_checkpoint_metric: true + restore_file: checkpoint_last.pt + +distributed_training: + ddp_backend: legacy_ddp + find_unused_parameters: true + distributed_world_size: 16 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: joint_sc2t_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: true # must be consistent with pre-training + labels: ["ltr"] + store_labels: true + single_target: true + add_decoder_target: true + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +dataset: + num_workers: 0 + max_tokens: 1300000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_100 + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: ctc_ce + zero_infinity: true + +optimization: + max_update: 40000 + lr: [0.00001] + sentence_avg: true + update_freq: [2] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: speechut_asr + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + add_decoder: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/SpeechUT/speechut/config/finetune_asr/speechut_large_960h.yaml b/SpeechT5/SpeechUT/speechut/config/finetune_asr/speechut_large_960h.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f10d6002555e5cbcfbf31035d8258e77abc26050 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/config/finetune_asr/speechut_large_960h.yaml @@ -0,0 +1,100 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 100 + tensorboard_logdir: tblog + +checkpoint: + save_interval: 1 + keep_last_epochs: 5 + keep_best_checkpoints: 5 + best_checkpoint_metric: dec_accuracy + maximize_best_checkpoint_metric: true + restore_file: checkpoint_last.pt + +distributed_training: + ddp_backend: legacy_ddp + find_unused_parameters: true + distributed_world_size: 24 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: joint_sc2t_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: true # must be consistent with pre-training + labels: ["ltr"] + store_labels: true + single_target: true + add_decoder_target: true + pad_audio: false + random_crop: true + hubert_tokenizer: "none" + sp_path: None + +dataset: + num_workers: 0 + max_tokens: 1300000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_960 + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: ctc_ce + zero_infinity: true + +optimization: + max_update: 40000 + lr: [0.00001] + sentence_avg: true + update_freq: [2] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: speechut_asr + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.25 + mask_channel_length: 64 + layerdrop: 0.0 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + add_decoder: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/SpeechUT/speechut/config/pretrain/speechut_base_librispeech.yaml b/SpeechT5/SpeechUT/speechut/config/pretrain/speechut_base_librispeech.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a3751febf2efc3cbf7a91e3a75f05b570559f2c --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/config/pretrain/speechut_base_librispeech.yaml @@ -0,0 +1,153 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 4 + keep_last_epochs: 4 + save_interval_updates: 50000 + keep_interval_updates: -1 + keep_interval_updates_pattern: 50000 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_port: -1 + distributed_world_size: 32 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: joint_sc2t_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + store_labels: true + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: false # must be consistent with extractor + add_decoder_target: true + text_cfg: + seed: ${common.seed} + text_data: ??? + data_config: config.yaml + sample_break_mode: eos + tokens_per_sample: 1024 + shorten_method: "random_crop" + text_maxtokens_ratio: 1.5 + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 1 + +criterion: + _name: speechut_criterion + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + label_smoothing: 0.1 + u2t_ed_weight: 0.1 + u2t_ctc_weight: 0.1 + text_mum_weight: 0.5 + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: speechut + label_rate: ??? + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: default + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + activation_fn: "gelu" + encoder_layers: 6 + encoder_attention_heads: 8 + encoder_layerdrop: 0.0 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + add_unit_encoder: true + add_text_ctc: true + mask_u2t: false + mix_with_unit: true + add_decoder: true + reset_decoder_embedding_config: true + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + max_source_positions: 3000 + max_target_positions: 3000 + no_scale_embedding: true + layernorm_embedding: true + no_token_positional_embeddings: false + share_decoder_input_output_embed: false + encoder: + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 8 + normalize_before: false + learned_pos: true + layerdrop: ${model.encoder_layerdrop} + decoder: + layerdrop: 0.1 + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 12 + normalize_before: false + learned_pos: false + output_dim: 768 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/SpeechUT/speechut/config/pretrain/speechut_large_librilight.yaml b/SpeechT5/SpeechUT/speechut/config/pretrain/speechut_large_librilight.yaml new file mode 100644 index 0000000000000000000000000000000000000000..849c1d986126f6e26f3e10feb14fae0a299be4b4 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/config/pretrain/speechut_large_librilight.yaml @@ -0,0 +1,159 @@ +# @package _group_ + +common: + fp16: true + fp16_scale_tolerance: 0.1 # alleviate fp16 overflow issue + log_format: json + log_interval: 200 + seed: 1234 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 1 + keep_last_epochs: 4 + save_interval_updates: 10000 + keep_interval_updates: -1 + keep_interval_updates_pattern: 10000 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_port: -1 + distributed_world_size: 128 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: joint_sc2t_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + store_labels: true + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: true # must be consistent with extractor + add_decoder_target: true + text_cfg: + seed: ${common.seed} + text_data: ??? + data_config: config.yaml + sample_break_mode: eos + tokens_per_sample: 1024 + shorten_method: "random_crop" + text_maxtokens_ratio: 1.4 + +dataset: + num_workers: 6 + max_tokens: 900000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 2 + +criterion: + _name: speechut_criterion + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + label_smoothing: 0.1 + u2t_ed_weight: 0.1 + u2t_ctc_weight: 0.1 + text_mum_weight: 0.5 + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 1.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + end_learning_rate: 0.00015 # for future longger pre-training, e.g. 600K step + +model: + _name: speechut + label_rate: ??? + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: layer_norm + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 768 + activation_fn: "gelu" + encoder_layers: 12 + encoder_attention_heads: 16 + encoder_layerdrop: 0.0 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + layer_norm_first: true + feature_grad_mult: 1.0 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + add_unit_encoder: true + add_text_ctc: true + mask_u2t: false + mix_with_unit: true + add_decoder: true + reset_decoder_embedding_config: true + scaling_for_att: 32 # alleviate fp16 overflow issue + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + max_source_positions: 3000 + max_target_positions: 3000 + no_scale_embedding: true + layernorm_embedding: true + no_token_positional_embeddings: true + share_decoder_input_output_embed: false + encoder: + embed_dim: 1024 + ffn_embed_dim: 4096 + layers: 12 + attention_heads: 16 + normalize_before: false + learned_pos: true + layerdrop: ${model.encoder_layerdrop} + decoder: + layerdrop: 0.1 + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 12 + normalize_before: false + learned_pos: false + output_dim: 768 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/SpeechUT/speechut/criterions/__init__.py b/SpeechT5/SpeechUT/speechut/criterions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2bf9fac9a8c00d76decd07417d86a2625c4c851c --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/criterions/__init__.py @@ -0,0 +1,9 @@ +import importlib +import os + +for file in os.listdir(os.path.dirname(__file__)): + if file.endswith(".py") and not file.startswith("_"): + criterion_name = file[: file.find(".py")] + importlib.import_module( + "speechut.criterions." + criterion_name + ) diff --git a/SpeechT5/SpeechUT/speechut/criterions/ctc_ce.py b/SpeechT5/SpeechUT/speechut/criterions/ctc_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..aab6c9d23ac3b7dc410704bcba8982a697a57656 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/criterions/ctc_ce.py @@ -0,0 +1,414 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import math +from argparse import Namespace +from dataclasses import dataclass, field +from omegaconf import II +from typing import Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.dataclass import FairseqDataclass +from fairseq.data.data_utils import post_process +from fairseq.tasks import FairseqTask +from fairseq.logging.meters import safe_round + + +@dataclass +class CtcCeCriterionConfig(FairseqDataclass): + zero_infinity: bool = field( + default=False, + metadata={"help": "zero inf loss when source length <= target length"}, + ) + sentence_avg: bool = II("optimization.sentence_avg") + post_process: str = field( + default="letter", + metadata={ + "help": "how to post process predictions into words. can be letter, " + "wordpiece, BPE symbols, etc. " + "See fairseq.data.data_utils.post_process() for full list of options" + }, + ) + wer_kenlm_model: Optional[str] = field( + default=None, + metadata={ + "help": "if this is provided, use kenlm to compute wer (along with other wer_* args)" + }, + ) + wer_lexicon: Optional[str] = field( + default=None, + metadata={"help": "lexicon to use with wer_kenlm_model"}, + ) + wer_lm_weight: float = field( + default=2.0, + metadata={"help": "lm weight to use with wer_kenlm_model"}, + ) + wer_word_score: float = field( + default=-1.0, + metadata={"help": "lm word score to use with wer_kenlm_model"}, + ) + + wer_args: Optional[str] = field( + default=None, + metadata={ + "help": "DEPRECATED: tuple of (wer_kenlm_model, wer_lexicon, wer_lm_weight, wer_word_score)" + }, + ) + + dec_weight: float = field( + default=0.5, + metadata={"help": "weights for decoder CE Loss, loss will be ((1 - dec_weight) * hubert_loss + dec_weight * CE_Loss)"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + label_smoothing: float = field( + default=0.1, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + + +@register_criterion("ctc_ce", dataclass=CtcCeCriterionConfig) +class CtcCeCriterion(FairseqCriterion): + def __init__(self, cfg: CtcCeCriterionConfig, task: FairseqTask): + super().__init__(task) + self.blank_idx = ( + task.target_dictionary.index(task.blank_symbol) + if hasattr(task, "blank_symbol") + else 0 + ) + self.pad_idx = task.target_dictionary.pad() + self.eos_idx = task.target_dictionary.eos() + self.post_process = cfg.post_process + + if cfg.wer_args is not None: + ( + cfg.wer_kenlm_model, + cfg.wer_lexicon, + cfg.wer_lm_weight, + cfg.wer_word_score, + ) = eval(cfg.wer_args) + + if cfg.wer_kenlm_model is not None: + from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder + + dec_args = Namespace() + dec_args.nbest = 1 + dec_args.criterion = "ctc" + dec_args.kenlm_model = cfg.wer_kenlm_model + dec_args.lexicon = cfg.wer_lexicon + dec_args.beam = 50 + dec_args.beam_size_token = min(50, len(task.target_dictionary)) + dec_args.beam_threshold = min(50, len(task.target_dictionary)) + dec_args.lm_weight = cfg.wer_lm_weight + dec_args.word_score = cfg.wer_word_score + dec_args.unk_weight = -math.inf + dec_args.sil_weight = 0 + + self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary) + else: + self.w2l_decoder = None + + self.zero_infinity = cfg.zero_infinity + self.sentence_avg = cfg.sentence_avg + + self.dec_weight = cfg.dec_weight + self.report_accuracy = cfg.report_accuracy + self.ignore_prefix_size = cfg.ignore_prefix_size + self.eps = cfg.label_smoothing + + def forward(self, model, sample, reduce=True): + net_output = model(**sample["net_input"]) + lprobs = model.get_normalized_probs( + net_output, log_probs=True + ).contiguous() # (T, B, C) from the encoder + + if "src_lengths" in sample["net_input"]: + input_lengths = sample["net_input"]["src_lengths"] + else: + if net_output["padding_mask"] is not None: + non_padding_mask = ~net_output["padding_mask"] + input_lengths = non_padding_mask.long().sum(-1) + else: + input_lengths = lprobs.new_full( + (lprobs.size(1),), lprobs.size(0), dtype=torch.long + ) + + pad_mask = (sample["target"] != self.pad_idx) & ( + sample["target"] != self.eos_idx + ) + targets_flat = sample["target"].masked_select(pad_mask) + if "target_lengths" in sample: + target_lengths = sample["target_lengths"] + else: + target_lengths = pad_mask.sum(-1) + + with torch.backends.cudnn.flags(enabled=False): + loss = F.ctc_loss( + lprobs, + targets_flat, + input_lengths, + target_lengths, + blank=self.blank_idx, + reduction="sum", + zero_infinity=self.zero_infinity, + ) + + ntokens = ( + sample["ntokens"] if "ntokens" in sample else target_lengths.sum().item() + ) + + sample_size = sample["target"].size(0) if self.sentence_avg else ntokens + + logging_output = {} + if "decoder_target" in sample: + if net_output["decoder_out"] is not None: + dec_sample_size = sample["target"].size(0) if self.sentence_avg else sample["dec_ntokens"] + dec_loss, dec_nll_loss = self.compute_ce_loss(model, net_output["decoder_out"], sample, reduce=reduce) + logging_output["ctc_loss"] = loss.item() + loss = (1 - self.dec_weight) * loss + (self.dec_weight * dec_loss * sample_size / dec_sample_size) + logging_output["dec_loss"] = dec_loss.item() + logging_output["dec_nll_loss"] = dec_nll_loss.item() + logging_output["dec_sample_size"] = dec_sample_size + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output["decoder_out"], sample) + logging_output["dec_n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + else: + logging_output["ctc_loss"] = loss.item() + loss = (1 - self.dec_weight) * loss + logging_output["dec_loss"] = 0 + logging_output["dec_nll_loss"] = 0 + logging_output["dec_sample_size"] = 1 + if self.report_accuracy: + logging_output["dec_n_correct"] = 0 + logging_output["total"] = 1 + + logging_output = { + "loss": utils.item(loss.data), # * sample['ntokens'], + "ntokens": ntokens, + "nsentences": sample["id"].numel(), + "sample_size": sample_size, + **logging_output, + } + + if not model.training and self.dec_weight < 1.0: + import editdistance + + with torch.no_grad(): + lprobs_t = lprobs.transpose(0, 1).float().contiguous().cpu() + + c_err = 0 + c_len = 0 + w_errs = 0 + w_len = 0 + wv_errs = 0 + for lp, t, inp_l in zip( + lprobs_t, + sample["target_label"] + if "target_label" in sample + else sample["target"], + input_lengths, + ): + lp = lp[:inp_l].unsqueeze(0) + + decoded = None + if self.w2l_decoder is not None: + decoded = self.w2l_decoder.decode(lp) + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + + p = (t != self.task.target_dictionary.pad()) & ( + t != self.task.target_dictionary.eos() + ) + targ = t[p] + targ_units = self.task.target_dictionary.string(targ) + targ_units_arr = targ.tolist() + + toks = lp.argmax(dim=-1).unique_consecutive() + pred_units_arr = toks[toks != self.blank_idx].tolist() + + c_err += editdistance.eval(pred_units_arr, targ_units_arr) + c_len += len(targ_units_arr) + + targ_words = post_process(targ_units, self.post_process).split() + + pred_units = self.task.target_dictionary.string(pred_units_arr) + pred_words_raw = post_process(pred_units, self.post_process).split() + + if decoded is not None and "words" in decoded: + pred_words = decoded["words"] + w_errs += editdistance.eval(pred_words, targ_words) + wv_errs += editdistance.eval(pred_words_raw, targ_words) + else: + dist = editdistance.eval(pred_words_raw, targ_words) + w_errs += dist + wv_errs += dist + + w_len += len(targ_words) + + logging_output["wv_errors"] = wv_errs + logging_output["w_errors"] = w_errs + logging_output["w_total"] = w_len + logging_output["c_errors"] = c_err + logging_output["c_total"] = c_len + + return loss, sample_size, logging_output + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.pad_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.pad_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = sample["decoder_target"] + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + nsentences = utils.item( + sum(log.get("nsentences", 0) for log in logging_outputs) + ) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar("ntokens", ntokens) + metrics.log_scalar("nsentences", nsentences) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + + c_errors = sum(log.get("c_errors", 0) for log in logging_outputs) + metrics.log_scalar("_c_errors", c_errors) + c_total = sum(log.get("c_total", 0) for log in logging_outputs) + metrics.log_scalar("_c_total", c_total) + w_errors = sum(log.get("w_errors", 0) for log in logging_outputs) + metrics.log_scalar("_w_errors", w_errors) + wv_errors = sum(log.get("wv_errors", 0) for log in logging_outputs) + metrics.log_scalar("_wv_errors", wv_errors) + w_total = sum(log.get("w_total", 0) for log in logging_outputs) + metrics.log_scalar("_w_total", w_total) + + if c_total > 0: + metrics.log_derived( + "uer", + lambda meters: safe_round( + meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3 + ) + if meters["_c_total"].sum > 0 + else float("nan"), + ) + if w_total > 0: + metrics.log_derived( + "wer", + lambda meters: safe_round( + meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + metrics.log_derived( + "raw_wer", + lambda meters: safe_round( + meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + + if "dec_loss" in logging_outputs[0]: + ctc_loss_sum = sum(log.get("ctc_loss", 0) for log in logging_outputs) + dec_loss_sum = sum(log.get("dec_loss", 0) for log in logging_outputs) + dec_nll_loss_sum = sum(log.get("dec_nll_loss", 0) for log in logging_outputs) + dec_sample_size = sum(log.get("dec_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "dec_loss", dec_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_scalar( + "ctc_loss", ctc_loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "dec_nll_loss", dec_nll_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_derived( + "dec_ppl", lambda meters: utils.get_perplexity(meters["dec_nll_loss"].avg) + ) + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("dec_n_correct", n_correct) + metrics.log_derived( + "dec_accuracy", + lambda meters: round( + meters["dec_n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/SpeechT5/SpeechUT/speechut/criterions/speechut_criterion.py b/SpeechT5/SpeechUT/speechut/criterions/speechut_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..0d735f1efd16aebf4146e26d5a5ebaeca2516ad7 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/criterions/speechut_criterion.py @@ -0,0 +1,384 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import math +import re +from dataclasses import dataclass, field +from typing import List, Optional + +import numpy as np +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.dataclass import FairseqDataclass + +logger = logging.getLogger(__name__) + +@dataclass +class SpeechUTCriterionConfig(FairseqDataclass): + pred_masked_weight: float = field( + default=1.0, + metadata={"help": "weight for predictive loss for masked frames"}, + ) + pred_nomask_weight: float = field( + default=0.0, + metadata={"help": "weight for predictive loss for unmasked frames"}, + ) + loss_weights: Optional[List[float]] = field( + default=None, + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + log_keys: List[str] = field( + default_factory=lambda: [], + metadata={"help": "output keys to log"}, + ) + u2t_ed_weight: float = field( + default=0.1, + metadata={"help": "weights for text ED Loss, loss will be (hubert_loss + text_mum_weight * MUM_Loss + u2t_ed_weight * CE_Loss + u2t_ctc_weight * CTC_loss)"}, + ) + u2t_ctc_weight: float = field( + default=0.0, + metadata={"help": "weights for text ED Loss, loss will be (hubert_loss + text_mum_weight * MUM_Loss + u2t_ed_weight * CE_Loss + u2t_ctc_weight * CTC_loss)"}, + ) + text_mum_weight: float = field( + default=0.0, + metadata={"help": "masked unit modeling weight from the text end"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + no_ctc_blank: bool = field( + default=False, + metadata={"help": "mask out the blank of ctc, only when dec_loss_type=ctc"}, + ) + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + +@register_criterion("speechut_criterion", dataclass=SpeechUTCriterionConfig) +class SpeechUTCriterion(FairseqCriterion): + def __init__( + self, + task, + pred_masked_weight, + pred_nomask_weight, + loss_weights=None, + log_keys=None, + u2t_ed_weight=0.1, + u2t_ctc_weight=0, + text_mum_weight=0, + report_accuracy=False, + ignore_prefix_size=0, + label_smoothing=0, + no_ctc_blank=False, + ): + super().__init__(task) + self.pred_masked_weight = pred_masked_weight + self.pred_nomask_weight = pred_nomask_weight + self.loss_weights = loss_weights + self.log_keys = [] if log_keys is None else log_keys + self.u2t_ed_weight = u2t_ed_weight + self.u2t_ctc_weight = u2t_ctc_weight + self.text_mum_weight = text_mum_weight + self.report_accuracy = report_accuracy + self.ignore_prefix_size = ignore_prefix_size + self.eps = label_smoothing + self.no_ctc_blank = no_ctc_blank + self.padding_idx = task.dictionaries[0].pad() + self.eos_idx = task.dictionaries[0].eos() + self.blank_idx = task.dictionaries[0].bos() + + def compute_hubert_loss(self, model, net_output, reduction, preffix='', suffix=''): + loss = 0 + sample_size = [] + logging_output = {} + loss_m_list = [] + logp_m_list = model.get_logits(net_output, True) + targ_m_list = model.get_targets(net_output, True) + assert self.pred_masked_weight == 0 or len(logp_m_list) > 0 + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"{preffix}loss_m_{i}"] = loss_m.detach().item() + if self.pred_masked_weight > 0: + loss += self.pred_masked_weight * sum(loss_m_list) + sample_size.append(targ_m_list[0].numel()) + + loss_u_list = [] + logp_u_list = model.get_logits(net_output, False) + targ_u_list = model.get_targets(net_output, False) + assert self.pred_nomask_weight == 0 or len(logp_u_list) > 0 + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"{preffix}loss_u_{i}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss += self.pred_nomask_weight * sum(loss_u_list) + sample_size.append(targ_u_list[0].numel()) + + sample_size = np.mean(sample_size) + + def compute_correct(logits, targets): + if logits.numel() == 0: + return 0, 0 + else: + assert logits.dim() > 1, logits.shape + max = logits.argmax(-1) == targets + min = logits.argmin(-1) == targets + both = max & min + corr = max.long().sum().item() - both.long().sum().item() + count = max.numel() + return corr, count + + with torch.no_grad(): + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + corr_m, count_m = compute_correct(logp_m, targ_m) + logging_output[f"correct_m_{i}{suffix}"] = corr_m + logging_output[f"count_m_{i}{suffix}"] = count_m + + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + corr_u, count_u = compute_correct(logp_u, targ_u) + logging_output[f"correct_u_{i}{suffix}"] = corr_u + logging_output[f"count_u_{i}{suffix}"] = count_u + + return loss, sample_size, logging_output + + + def forward(self, model, sample, reduce=True, log_pred=False): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + reduction = "sum" if reduce else "none" + + if "net_input" in sample: + unit_sample = text_sample = None + else: + unit_sample = sample.get("text_mono", None) + text_sample = sample.get("text_paired", None) + assert unit_sample is not None or text_sample is not None + sample = sample.get("speech") + + ### 1. S2U: do hubert forward and loss computation + sample["modality"] = "speech" + net_output = model(target_list=sample["target_list"], **sample["net_input"]) + loss, sample_size, logging_output = self.compute_hubert_loss( + model, + net_output, + reduction, + ) + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len( + self.loss_weights + ), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss += p + logging_output[f"loss_{n}"] = p.item() + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + ### 2. do text U2T forward and loss computation + if text_sample is not None and (self.u2t_ctc_weight + self.u2t_ed_weight) > 0: + ## 2.1 re-loading "target_list", in default case, target_list = [src_tokens], + ## while in case of using "unit-phone-char" structure, target_list will be [ref_tokens] + text_sample["net_input"]["target_list"] = [ + text_sample.get("ref_tokens", text_sample["net_input"]["src_tokens"].clone()), + ] + text_net_output = model(**text_sample["net_input"]) + text_sample_size = text_sample["ntokens"] + + ### 2.1 U2T_UCTC + if self.u2t_ctc_weight > 0: + text_ctc_loss = self.compute_ctc_loss(model, text_net_output, text_sample["target"], reduction=reduction) + loss += self.u2t_ctc_weight * text_ctc_loss * sample_size / text_sample_size + logging_output["text_ctc_loss"] = utils.item(text_ctc_loss) + logging_output["text_sample_size"] = text_sample_size + + ### 2.2 U2T_ED + if self.u2t_ed_weight > 0: + text_dec_loss, text_dec_nll_loss = self.compute_ce_loss(model, text_net_output["decoder_out"], text_sample, reduce=reduce) + loss += self.u2t_ed_weight * text_dec_loss * sample_size / text_sample_size + logging_output["text_dec_loss"] = utils.item(text_dec_loss) + logging_output["text_dec_nll_loss"] = utils.item(text_dec_nll_loss) + logging_output["text_sample_size"] = text_sample_size + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, text_net_output["decoder_out"], text_sample) + logging_output["correct_text_dec"] = utils.item(n_correct.data) + logging_output["count_text_dec"] = utils.item(total.data) + + ### 3. do unit MUM forward and loss computation + if unit_sample is not None and self.text_mum_weight > 0: + src_tokens = unit_sample["net_input"]["src_tokens"] + target = unit_sample.get("target", None) + target = src_tokens.clone() if target is None else target + unit_net_output = model.forward_mum(src_tokens, target) + loss_num, sample_size_mum, logging_output_mum = self.compute_hubert_loss( + model, + unit_net_output, + reduction, + preffix="mum_", + suffix="_mum", + ) + loss += self.text_mum_weight * loss_num * sample_size / sample_size_mum + logging_output["unit_sample_size"] = sample_size_mum + logging_output.update(logging_output_mum) + + logging_output = { + "loss": utils.item(loss) if reduce else loss, + "ntokens": sample_size, + "nsentences": sample["id"].numel() + (text_sample["id"].numel() if text_sample is not None else 0), + "sample_size": sample_size, + **logging_output, + } + + return loss, sample_size, logging_output + + def compute_ctc_loss(self, model, net_output, target, reduction): + logits = net_output["encoder_out_ctc"][0] # (T, B, C) from the code-encoder + if self.no_ctc_blank: + ## set prob of to -inf + logits = logits.float() + logits[:, :, self.blank_idx] = -1000000.0 + + lprobs = F.log_softmax(logits.float(), dim=-1) + + encoder_padding_mask = net_output["encoder_padding_mask"][0] + non_padding_mask = ~encoder_padding_mask + input_lengths = non_padding_mask.long().sum(-1) + pad_mask = (target != self.padding_idx) & (target != self.eos_idx) + targets_flat = target.masked_select(pad_mask) + target_lengths = pad_mask.sum(-1) + + with torch.backends.cudnn.flags(enabled=False): + loss = F.ctc_loss( + lprobs, + targets_flat, + input_lengths, + target_lengths, + blank=self.blank_idx, + reduction=reduction, + zero_infinity=True, + ) + return loss + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.padding_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.padding_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = sample["target"] + + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training (copied from normal cross entropy).""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + else: + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + counts = {} + for lk in logging_outputs[0].keys(): + if lk.startswith("count_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val) + counts[lk] = val + + for lk in logging_outputs[0].keys(): + if lk.startswith("loss_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / sample_size / math.log(2), round=3) + elif lk.startswith("correct_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)]) + + if "text_sample_size" in logging_outputs[0]: + text_sample_size = sum(log.get("text_sample_size", 0) for log in logging_outputs) + for lk in logging_outputs[0].keys(): + if lk.startswith("text_") and lk.endswith("_loss"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / text_sample_size / math.log(2), round=3) + + if "unit_sample_size" in logging_outputs[0]: + unit_sample_size = sum(log.get("unit_sample_size", 0) for log in logging_outputs) + for lk in logging_outputs[0].keys(): + if lk.startswith("mum_loss_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / unit_sample_size / math.log(2), round=3) + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + raise NotImplementedError() + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/SpeechT5/SpeechUT/speechut/data/concat_dataset.py b/SpeechT5/SpeechUT/speechut/data/concat_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..5766921ac39b571010b318e0d4b6f967cd21d96e --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/data/concat_dataset.py @@ -0,0 +1,129 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import bisect + +import numpy as np +from torch.utils.data.dataloader import default_collate + +from fairseq.data import FairseqDataset + + +class ConcatDataset(FairseqDataset): + @staticmethod + def cumsum(sequence, sample_ratios): + r, s = [], 0 + for e, ratio in zip(sequence, sample_ratios): + curr_len = int(ratio * len(e)) + r.append(curr_len + s) + s += curr_len + return r + + def __init__(self, datasets, sample_ratios=1): + super(ConcatDataset, self).__init__() + assert len(datasets) > 0, "datasets should not be an empty iterable" + self.datasets = list(datasets) + if isinstance(sample_ratios, int): + sample_ratios = [sample_ratios] * len(self.datasets) + self.sample_ratios = sample_ratios + self.cumulative_sizes = self.cumsum(self.datasets, sample_ratios) + self.real_sizes = [len(d) for d in self.datasets] + + def __len__(self): + return self.cumulative_sizes[-1] + + def __getitem__(self, idx): + dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx) + return self.datasets[dataset_idx][sample_idx] + + def _get_dataset_and_sample_index(self, idx: int): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + sample_idx = sample_idx % self.real_sizes[dataset_idx] + return dataset_idx, sample_idx + + def collater(self, samples, **extra_args): + # For now only supports datasets with same underlying collater implementations + if hasattr(self.datasets[0], "collater"): + return self.datasets[0].collater(samples, **extra_args) + else: + return default_collate(samples, **extra_args) + + def size(self, idx: int): + """ + Return an example's size as a float or tuple. + """ + dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx) + return self.datasets[dataset_idx].size(sample_idx) + + def num_tokens(self, index: int): + return np.max(self.size(index)) + + def attr(self, attr: str, index: int): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, index) + return getattr(self.datasets[dataset_idx], attr, None) + + @property + def sizes(self): + _dataset_sizes = [] + for ds, sr in zip(self.datasets, self.sample_ratios): + if isinstance(ds.sizes, np.ndarray): + _dataset_sizes.append(np.tile(ds.sizes, sr)) + else: + # Only support underlying dataset with single size array. + assert isinstance(ds.sizes, list) + _dataset_sizes.append(np.tile(ds.sizes[0], sr)) + return np.concatenate(_dataset_sizes) + + @property + def supports_prefetch(self): + return all(d.supports_prefetch for d in self.datasets) + + def ordered_indices(self): + """ + Returns indices sorted by length. So less padding is needed. + """ + if isinstance(self.sizes, np.ndarray) and len(self.sizes.shape) > 1: + # special handling for concatenating lang_pair_datasets + if getattr(self.datasets[0], "shuffle", False): + indices = np.random.permutation(len(self)).astype(np.int64) + else: + indices = np.arange(len(self), dtype=np.int64) + sizes = self.sizes + tgt_sizes = ( + sizes[:, 1] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else None + ) + src_sizes = ( + sizes[:, 0] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else sizes + ) + # sort by target length, then source length + if tgt_sizes is not None: + indices = indices[np.argsort(tgt_sizes[indices], kind="mergesort")] + return indices[np.argsort(src_sizes[indices], kind="mergesort")] + else: + return np.argsort(self.sizes) + + def prefetch(self, indices): + frm = 0 + for to, ds in zip(self.cumulative_sizes, self.datasets): + real_size = len(ds) + if getattr(ds, "supports_prefetch", False): + ds.prefetch([(i - frm) % real_size for i in indices if frm <= i < to]) + frm = to + + @property + def can_reuse_epoch_itr_across_epochs(self): + return all(d.can_reuse_epoch_itr_across_epochs for d in self.datasets) + + def set_epoch(self, epoch): + super().set_epoch(epoch) + for ds in self.datasets: + if hasattr(ds, "set_epoch"): + ds.set_epoch(epoch) diff --git a/SpeechT5/SpeechUT/speechut/data/hubert_dataset.py b/SpeechT5/SpeechUT/speechut/data/hubert_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..64965dea445a0a5afc63c887b1bc89cece0b203b --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/data/hubert_dataset.py @@ -0,0 +1,597 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import itertools +import logging +import io +import os +import sys +import time +from pathlib import Path +from typing import Any, List, Optional, Union, Tuple + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils, Dictionary +from fairseq.data.fairseq_dataset import FairseqDataset +from fairseq.data.audio.audio_utils import ( + read_from_stored_zip, + is_sf_audio_data, +) + +FEATURE_OR_SF_AUDIO_FILE_EXTENSIONS = {".npy", ".wav", ".flac", ".ogg"} + +logger = logging.getLogger(__name__) + +def parse_path(path: str) -> Tuple[str, List[int]]: + """Parse data path which is either a path to + 1. a .npy/.wav/.flac/.ogg file + 2. a stored ZIP file with slicing info: "[zip_path]:[offset]:[length]" + + Args: + path (str): the data path to parse + + Returns: + file_path (str): the file path + slice_ptr (list of int): empty in case 1; + byte offset and length for the slice in case 2 + """ + + if Path(path).suffix in FEATURE_OR_SF_AUDIO_FILE_EXTENSIONS: + _path, slice_ptr = path, [] + else: + _path, *slice_ptr = path.split(":") + if not Path(_path).is_file(): + raise FileNotFoundError(f"File not found: {_path}") + assert len(slice_ptr) in {0, 1, 2}, f"Invalid path: {path}" + slice_ptr = [int(i) for i in slice_ptr] + return _path, slice_ptr + +def load_audio(manifest_path, max_keep, min_keep, retry_times=5): + n_long, n_short = 0, 0 + names, inds, sizes, chunk_names, chunk_indices = [], [], [], [], [] + for i in range(retry_times): + with open(manifest_path) as f: + root = f.readline().strip() + for ind, line in enumerate(f): + items = line.strip().split("\t") + assert len(items) == 2, line + sz = int(items[1]) + if min_keep is not None and sz < min_keep: + n_short += 1 + elif max_keep is not None and sz > max_keep: + n_long += 1 + else: + fname = items[0].split(":") + if len(fname) > 2: + if len(chunk_names) == 0 or fname[0] != chunk_names[-1]: + chunk_names.append(fname[0]) + chunk_indices.append(len(names)) + names.append(items[0]) + inds.append(ind) + sizes.append(sz) + if len(names) == 0: + logger.warn(f"Fail to load manifest for the {i} time") + time.sleep(1) + continue + else: + break + tot = ind + 1 + logger.info( + ( + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, names, inds, tot, sizes, chunk_names, chunk_indices + + +def load_label(label_path, inds, tot, retry_times=5): + for i in range(retry_times): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + if len(labels) == 0: + logger.warn(f"Fail to load label for the {i} time") + time.sleep(1) + continue + else: + break + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds, tot, retry_times=5): + for i in range(retry_times): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + if len(code_lengths) == 0: + logger.warn(f"Fail to load label for the {i} time") + time.sleep(1) + continue + else: + break + assert ( + len(code_lengths) == tot + ), f"number of labels does not match ({len(code_lengths)} != {tot})" + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + +def verify_label_lengths( + audio_sizes, + audio_rate, + label_path, + label_rate, + inds, + tot, + tol=0.1, # tolerance in seconds +): + if label_rate < 0: + logger.info(f"{label_path} is sequence label. skipped") + return + + with open(label_path) as f: + lengths = [len(line.rstrip().split()) for line in f] + assert len(lengths) == tot + lengths = [lengths[i] for i in inds] + num_invalid = 0 + for i, ind in enumerate(inds): + dur_from_audio = audio_sizes[i] / audio_rate + dur_from_label = lengths[i] / label_rate + if abs(dur_from_audio - dur_from_label) > tol: + logger.warning( + ( + f"audio and label duration differ too much " + f"(|{dur_from_audio} - {dur_from_label}| > {tol}) " + f"in line {ind+1} of {label_path}. Check if `label_rate` " + f"is correctly set (currently {label_rate}). " + f"num. of samples = {audio_sizes[i]}; " + f"label length = {lengths[i]}" + ) + ) + num_invalid += 1 + if num_invalid > 0: + logger.warning( + f"total {num_invalid} (audio, label) pairs with mismatched lengths" + ) + + +class HubertDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + random_crop: bool = False, + single_target: bool = False, + tgt_dict: Optional[Dictionary] = None, + add_decoder_target: bool = False, + fine_tuning: bool = False, + tgt_lang_idx: int = None, + tokenizer = None, + mbart_style_lang_id: bool = False, + retry_times: int = 5, + reduce_label_for_dec: bool = True, + ): + self.audio_root, self.audio_names, inds, tot, self.wav_sizes, self.chunk_names, self.chunk_indices = load_audio( + manifest_path, max_keep_sample_size, min_keep_sample_size, retry_times + ) + self.sample_rate = sample_rate + self.shuffle = shuffle + self.random_crop = random_crop + self.tgt_dict = tgt_dict + self.add_decoder_target = add_decoder_target + self.fine_tuning = fine_tuning + + self.num_labels = len(label_paths) + self.pad_list = pad_list + self.eos_list = eos_list + self.label_processors = label_processors + self.single_target = single_target + self.epoch = 0 + + self.label_rates = ( + [label_rates for _ in range(len(label_paths))] + if isinstance(label_rates, int) + else label_rates + ) + self.store_labels = store_labels + if store_labels: + self.label_list = [load_label(p, inds, tot, retry_times) for p in label_paths] + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds, tot, retry_times) for p in label_paths + ] + assert label_processors is None or len(label_processors) == self.num_labels + for label_path, label_rate in zip(label_paths, self.label_rates): + verify_label_lengths( + self.wav_sizes, sample_rate, label_path, label_rate, inds, tot + ) + + self.max_sample_size = ( + max_sample_size if max_sample_size is not None else sys.maxsize + ) + self.pad_audio = pad_audio + self.normalize = normalize + self.tgt_lang_idx = tgt_lang_idx + self.tokenizer = tokenizer + self.mbart_style_lang_id = mbart_style_lang_id + self.retry_times = retry_times + self.reduce_label_for_dec = reduce_label_for_dec + logger.info( + f"pad_audio={pad_audio}, random_crop={random_crop}, tgt_lang_idx={self.tgt_lang_idx}, reduce_label_for_dec={reduce_label_for_dec}, " + f"mbart_style_lang_id={mbart_style_lang_id}, normalize={normalize}, max_sample_size={self.max_sample_size}" + ) + + def set_epoch(self, epoch): + self.epoch = epoch + + def batch_by_size(self, indices, max_tokens=None, max_sentences=None, required_batch_size_multiple=1): + self.max_tokens = max_tokens + self.max_sentences = max_sentences + self.required_batch_size_multiple = required_batch_size_multiple + if isinstance(indices[0], np.ndarray): + batch_list = [] + for indice in indices: + batch = super(HubertDataset, self).batch_by_size(indice, max_tokens, max_sentences, required_batch_size_multiple) + batch_list.append(batch) + return batch_list + else: + return super(HubertDataset, self).batch_by_size(indices, max_tokens, max_sentences, required_batch_size_multiple) + def shuffle_batches(self, batches, seed): + if isinstance(batches[0], list): + new_batches = [] + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + for batch in batches: + np.random.shuffle(batch) + new_batches.extend(batch) + return new_batches + else: + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + return batches + + def get_audio(self, index): + import soundfile as sf + + wav_path = os.path.join(self.audio_root, self.audio_names[index]) + _path, slice_ptr = parse_path(wav_path) + if len(slice_ptr) == 1: + import kaldiio + feat = kaldiio.load_mat(wav_path) + feat = torch.from_numpy(feat).float() + if self.normalize: + with torch.no_grad(): + feat = F.layer_norm(feat, feat.shape[-1]) + return feat + else: + if len(slice_ptr) == 2: + byte_data = read_from_stored_zip(_path, slice_ptr[0], slice_ptr[1]) + assert is_sf_audio_data(byte_data) + wav_path = io.BytesIO(byte_data) + for i in range(self.retry_times): + if i < self.retry_times - 1: + try: + wav, cur_sample_rate = sf.read(wav_path) + break + except Exception as e: + logger.warn(f"Fail to load wav for the {i} time") + logger.warn(e) + time.sleep(1) + continue + else: + wav, cur_sample_rate = sf.read(wav_path) + + wav = torch.from_numpy(wav).float() + wav = self.postprocess(wav, cur_sample_rate) + return wav + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.tokenizer is not None and self.fine_tuning: + label = self.tokenizer.encode(label) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + def __getitem__(self, index): + wav = self.get_audio(index) + labels = self.get_labels(index) + return {"id": index, "source": wav, "label_list": labels} + + def __len__(self): + return len(self.wav_sizes) + + def crop_to_max_size(self, wav, target_size): + size = len(wav) + diff = size - target_size + if diff <= 0: + return wav, 0 + + start, end = 0, target_size + if self.random_crop: + start = np.random.randint(0, diff + 1) + end = size - diff + start + return wav[start:end], start + + def collater(self, samples): + # target = max(sizes) -> random_crop not used + # target = max_sample_size -> random_crop used for long + samples = [s for s in samples if s["source"] is not None] + if len(samples) == 0: + return {} + + audios = [s["source"] for s in samples] + audio_sizes = [len(s) for s in audios] + if self.pad_audio: + audio_size = min(max(audio_sizes), self.max_sample_size) + else: + audio_size = min(min(audio_sizes), self.max_sample_size) + feat_dim = audios[0].size(-1) if audios[0].dim() > 1 else 1 + collated_audios, padding_mask, audio_starts = self.collater_audio( + audios, audio_size, feat_dim, + ) + + targets_by_label = [ + [s["label_list"][i] for s in samples] for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, audio_size, audio_starts + ) + + if self.add_decoder_target: + if self.fine_tuning: + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]], torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + else: + if self.tokenizer is not None: + decoder_label = [ + # Set 48 for translate int to char and avoid \n + torch.cat( + ( + torch.tensor( + self.tokenizer.sp.Encode( + "".join( + [chr(j + 48) for j in ( + targets_list[0][i, :lengths_list[0][i]].unique_consecutive() if self.reduce_label_for_dec else targets_list[0][i, :lengths_list[0][i]] + ).tolist()] + ), out_type=int + ) + ), + torch.tensor([self.tgt_dict.eos()]) + ), dim=0 + ).long() + for i in range(targets_list[0].size(0)) + ] + else: + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]].unique_consecutive() if self.reduce_label_for_dec else targets_list[0][i, :lengths_list[0][i]], torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + + if self.mbart_style_lang_id: + decoder_label = [ + torch.cat((decoder_label[i], torch.tensor([self.tgt_lang_idx])), 0).long() + for i in range(targets_list[0].size(0)) + ] + + dec_ntokens = sum(x.size(0) for x in decoder_label) + decoder_target = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos() if not self.mbart_style_lang_id else self.tgt_lang_idx, + left_pad=False, + move_eos_to_beginning=False, + ) + decoder_target_lengths = torch.tensor( + [x.size(0) for x in decoder_label], dtype=torch.long + ) + prev_output_tokens = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos() if not self.mbart_style_lang_id else self.tgt_lang_idx, + left_pad=False, + move_eos_to_beginning=True, + ) + + if self.tgt_lang_idx is not None and not self.mbart_style_lang_id: + assert (prev_output_tokens[:, 0] != self.tgt_dict.eos()).sum() == 0 + prev_output_tokens[:, 0] = self.tgt_lang_idx + + net_input = { + "source": collated_audios, + "padding_mask": padding_mask, + "prev_output_tokens": prev_output_tokens, + } + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + "decoder_target": decoder_target, + "decoder_target_lengths": decoder_target_lengths, + "dec_ntokens": dec_ntokens, + "lang_idx": self.tgt_lang_idx, + } + else: + net_input = {"source": collated_audios, "padding_mask": padding_mask} + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + return batch + + def collater_audio(self, audios, audio_size, feat_dim=1): + collated_audios = audios[0].new_zeros(len(audios), audio_size, feat_dim) + padding_mask = ( + torch.BoolTensor(collated_audios.shape[0:2]).fill_(False) + # if self.pad_audio else None + ) + audio_starts = [0 for _ in audios] + for i, audio in enumerate(audios): + audio = audio.view(-1, feat_dim) + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + assert self.pad_audio + collated_audios[i] = torch.cat([audio, audio.new_full((-diff, feat_dim), 0.0)]) + padding_mask[i, diff:] = True + else: + collated_audios[i], audio_starts[i] = self.crop_to_max_size( + audio, audio_size + ) + return collated_audios.squeeze(-1), padding_mask, audio_starts + + def collater_frm_label(self, targets, audio_size, audio_starts, label_rate, pad): + assert label_rate > 0 + s2f = label_rate / self.sample_rate + frm_starts = [int(round(s * s2f)) for s in audio_starts] + frm_size = int(round(audio_size * s2f)) + if not self.pad_audio: + rem_size = [len(t) - s for t, s in zip(targets, frm_starts)] + frm_size = min(frm_size, *rem_size) + targets = [t[s : s + frm_size] for t, s in zip(targets, frm_starts)] + logger.debug(f"audio_starts={audio_starts}") + logger.debug(f"frame_starts={frm_starts}") + logger.debug(f"frame_size={frm_size}") + + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens(targets, pad_idx=pad, left_pad=False) + return targets, lengths, ntokens + + def collater_label(self, targets_by_label, audio_size, audio_starts): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + if label_rate == -1: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + else: + targets, lengths, ntokens = self.collater_frm_label( + targets, audio_size, audio_starts, label_rate, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + if self.pad_audio: + return self.wav_sizes[index] + return min(self.wav_sizes[index], self.max_sample_size) + + @property + def sizes(self): + return np.array(self.wav_sizes) + + def ordered_indices(self): + """Return an ordered list of indices. Batches will be constructed based + on this order.""" + + if self.shuffle: + if len(self.chunk_names) > 0: + logger.info(f"ordered indices for epoch {self.epoch}") + with data_utils.numpy_seed(self.epoch): + self.chunk_order = np.random.permutation(len(self.chunk_names)) + chunk_count = 0 + tmp_sizes = [] + tmp_indices = [] + indice = [] + for i in self.chunk_order: + chunk_count += 1 + start = self.chunk_indices[i] + end = self.chunk_indices[i+1] if i < len(self.chunk_names) - 1 else len(self) + size = list(self.sizes[start:end]) + tmp_indices.extend(list(np.arange(start, end))) + tmp_sizes.extend(size) + if chunk_count % 10 == 0 or i == self.chunk_order[0]: + order = [np.random.permutation(len(tmp_indices))] + order.append( + np.minimum( + np.array(tmp_sizes), + self.max_sample_size, + ) + ) + sort_idx = np.lexsort(order)[::-1] + indice.append(np.array([tmp_indices[k] for k in sort_idx])) + tmp_indices = [] + tmp_sizes =[] + return indice + else: + order = [np.random.permutation(len(self))] + order.append( + np.minimum( + np.array(self.sizes), + self.max_sample_size, + ) + ) + return np.lexsort(order)[::-1] + else: + return np.arange(len(self)) + + def postprocess(self, wav, cur_sample_rate): + if wav.dim() == 2: + wav = wav.mean(-1) + assert wav.dim() == 1, wav.dim() + + if cur_sample_rate != self.sample_rate: + raise Exception(f"sr {cur_sample_rate} != {self.sample_rate}") + + if self.normalize: + with torch.no_grad(): + wav = F.layer_norm(wav, wav.shape) + return wav diff --git a/SpeechT5/SpeechUT/speechut/data/language_trible_dataset.py b/SpeechT5/SpeechUT/speechut/data/language_trible_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..6494127d6bb5d993d557f9f534f7cca83b0f7fa1 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/data/language_trible_dataset.py @@ -0,0 +1,669 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +import numpy as np +import torch +import os +import itertools + +from fairseq.data import FairseqDataset, data_utils +from fairseq.data import ( + AppendTokenDataset, + ConcatDataset, + PrependTokenDataset, + data_utils, + indexed_dataset, +) + +logger = logging.getLogger(__name__) + +def load_langtriple_dataset( + data_path, + split, + src, + src_dict, + ref, + ref_dict, + tgt, + tgt_dict, + combine, + dataset_impl, + upsample_primary, + left_pad_source, + left_pad_target, + max_source_positions, + max_target_positions, + prepend_bos=False, + load_alignments=False, + truncate_source=False, + append_source_id=False, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + prepend_bos_src=None, + lang_format="[{}]", +): + assert not truncate_source + def split_exists(split, src, ref, tgt, lang, data_path): + filename = os.path.join(data_path, "{}.{}-{}-{}.{}".format(split, src, ref, tgt, lang)) + return indexed_dataset.dataset_exists(filename, impl=dataset_impl) + + src_datasets = [] + ref_datasets = [] + tgt_datasets = [] + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + + # infer langcode + if split_exists(split_k, src, ref, tgt, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}-{}.".format(split_k, src, ref, tgt)) + elif split_exists(split_k, tgt, ref, src, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}-{}.".format(split_k, tgt, ref, src)) + else: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + src_dataset = data_utils.load_indexed_dataset( + prefix + src, src_dict, dataset_impl + ) + src_datasets.append(src_dataset) + + ref_dataset = data_utils.load_indexed_dataset( + prefix + ref, ref_dict, dataset_impl + ) + ref_datasets.append(ref_dataset) + + tgt_dataset = data_utils.load_indexed_dataset( + prefix + tgt, tgt_dict, dataset_impl + ) + if tgt_dataset is not None: + tgt_datasets.append(tgt_dataset) + + logger.info( + "{} {} {}-{}-{} {} examples".format( + data_path, split_k, src, ref, tgt, len(src_datasets[-1]) + ) + ) + + if not combine: + break + + assert len(src_datasets) == len(ref_datasets) + assert len(src_datasets) == len(tgt_datasets) or len(tgt_datasets) == 0 + + if len(src_datasets) == 1: + src_dataset = src_datasets[0] + ref_dataset = ref_datasets[0] + tgt_dataset = tgt_datasets[0] if len(tgt_datasets) > 0 else None + else: + sample_ratios = [1] * len(src_datasets) + sample_ratios[0] = upsample_primary + src_dataset = ConcatDataset(src_datasets, sample_ratios) + ref_dataset = ConcatDataset(ref_datasets, sample_ratios) + if len(tgt_datasets) > 0: + tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios) + else: + tgt_dataset = None + + if prepend_bos: + assert hasattr(src_dict, "bos_index") and hasattr(ref_dict, "bos_index") and hasattr(tgt_dict, "bos_index") + src_dataset = PrependTokenDataset(src_dataset, src_dict.bos()) + ref_dataset = PrependTokenDataset(ref_dataset, ref_dict.bos()) + if tgt_dataset is not None: + tgt_dataset = PrependTokenDataset(tgt_dataset, tgt_dict.bos()) + elif prepend_bos_src is not None: + logger.info(f"prepending src bos: {prepend_bos_src}") + src_dataset = PrependTokenDataset(src_dataset, prepend_bos_src) + ref_dataset = PrependTokenDataset(ref_dataset, prepend_bos_src) + + eos = None + if append_source_id: + src_dataset = AppendTokenDataset( + src_dataset, src_dict.index(lang_format.format(src)) + ) + ref_dataset = AppendTokenDataset( + ref_dataset, ref_dict.index(lang_format.format(ref)) + ) + if tgt_dataset is not None: + tgt_dataset = AppendTokenDataset( + tgt_dataset, tgt_dict.index(lang_format.format(tgt)) + ) + eos = tgt_dict.index(lang_format.format(tgt)) + + align_dataset = None + if load_alignments: + align_path = os.path.join(data_path, "{}.align.{}-{}".format(split, src, tgt)) + if indexed_dataset.dataset_exists(align_path, impl=dataset_impl): + align_dataset = data_utils.load_indexed_dataset( + align_path, None, dataset_impl + ) + + tgt_dataset_sizes = tgt_dataset.sizes if tgt_dataset is not None else None + return LanguageTripleDataset( + src_dataset, + src_dataset.sizes, + src_dict, + ref_dataset, + ref_dataset.sizes, + ref_dict, + tgt_dataset, + tgt_dataset_sizes, + tgt_dict, + left_pad_source=left_pad_source, + left_pad_target=left_pad_target, + align_dataset=align_dataset, + eos=eos, + num_buckets=num_buckets, + shuffle=shuffle, + pad_to_multiple=pad_to_multiple, + ) + + +def collate( + samples, + pad_idx, + eos_idx, + left_pad_source=True, + left_pad_target=False, + input_feeding=True, + pad_to_length=None, + pad_to_multiple=1, +): + if len(samples) == 0: + return {} + + def merge(key, left_pad, move_eos_to_beginning=False, pad_to_length=None): + return data_utils.collate_tokens( + [s[key] for s in samples], + pad_idx, + None, + left_pad, + move_eos_to_beginning, + pad_to_length=pad_to_length, + pad_to_multiple=pad_to_multiple, + ) + + def check_alignment(alignment, src_len, tgt_len): + if alignment is None or len(alignment) == 0: + return False + if ( + alignment[:, 0].max().item() >= src_len - 1 + or alignment[:, 1].max().item() >= tgt_len - 1 + ): + logger.warning("alignment size mismatch found, skipping alignment!") + return False + return True + + def compute_alignment_weights(alignments): + """ + Given a tensor of shape [:, 2] containing the source-target indices + corresponding to the alignments, a weight vector containing the + inverse frequency of each target index is computed. + For e.g. if alignments = [[5, 7], [2, 3], [1, 3], [4, 2]], then + a tensor containing [1., 0.5, 0.5, 1] should be returned (since target + index 3 is repeated twice) + """ + align_tgt = alignments[:, 1] + _, align_tgt_i, align_tgt_c = torch.unique( + align_tgt, return_inverse=True, return_counts=True + ) + align_weights = align_tgt_c[align_tgt_i[np.arange(len(align_tgt))]] + return 1.0 / align_weights.float() + + id = torch.LongTensor([s["id"] for s in samples]) + src_tokens = merge( + "source", + left_pad=left_pad_source, + pad_to_length=pad_to_length["source"] if pad_to_length is not None else None, + ) + ref_tokens = merge( + "reference", + left_pad=left_pad_source, + pad_to_length=pad_to_length["source"] if pad_to_length is not None else None, + ) + # sort by descending source length + src_lengths = torch.LongTensor( + [s["source"].ne(pad_idx).long().sum() for s in samples] + ) + ref_lengths = torch.LongTensor( + [s["reference"].ne(pad_idx).long().sum() for s in samples] + ) + src_lengths, sort_order = src_lengths.sort(descending=True) + id = id.index_select(0, sort_order) + src_tokens = src_tokens.index_select(0, sort_order) + ref_lengths = ref_lengths.index_select(0, sort_order) + ref_tokens = ref_tokens.index_select(0, sort_order) + + prev_output_tokens = None + target = None + if samples[0].get("target", None) is not None: + target = merge( + "target", + left_pad=left_pad_target, + pad_to_length=pad_to_length["target"] + if pad_to_length is not None + else None, + ) + target = target.index_select(0, sort_order) + tgt_lengths = torch.LongTensor( + [s["target"].ne(pad_idx).long().sum() for s in samples] + ).index_select(0, sort_order) + ntokens = tgt_lengths.sum().item() + + if samples[0].get("prev_output_tokens", None) is not None: + prev_output_tokens = merge("prev_output_tokens", left_pad=left_pad_target) + elif input_feeding: + # we create a shifted version of targets for feeding the + # previous output token(s) into the next decoder step + prev_output_tokens = merge( + "target", + left_pad=left_pad_target, + move_eos_to_beginning=True, + pad_to_length=pad_to_length["target"] + if pad_to_length is not None + else None, + ) + else: + ntokens = src_lengths.sum().item() + + batch = { + "id": id, + "nsentences": len(samples), + "ntokens": ntokens, + "net_input": { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + }, + "target": target, + "ref_tokens": ref_tokens, + "ref_lengths": ref_lengths, + } + if prev_output_tokens is not None: + batch["net_input"]["prev_output_tokens"] = prev_output_tokens.index_select( + 0, sort_order + ) + + if samples[0].get("alignment", None) is not None: + bsz, tgt_sz = batch["target"].shape + src_sz = batch["net_input"]["src_tokens"].shape[1] + + offsets = torch.zeros((len(sort_order), 2), dtype=torch.long) + offsets[:, 1] += torch.arange(len(sort_order), dtype=torch.long) * tgt_sz + if left_pad_source: + offsets[:, 0] += src_sz - src_lengths + if left_pad_target: + offsets[:, 1] += tgt_sz - tgt_lengths + + alignments = [ + alignment + offset + for align_idx, offset, src_len, tgt_len in zip( + sort_order, offsets, src_lengths, tgt_lengths + ) + for alignment in [samples[align_idx]["alignment"].view(-1, 2)] + if check_alignment(alignment, src_len, tgt_len) + ] + + if len(alignments) > 0: + alignments = torch.cat(alignments, dim=0) + align_weights = compute_alignment_weights(alignments) + + batch["alignments"] = alignments + batch["align_weights"] = align_weights + + if samples[0].get("constraints", None) is not None: + # Collate the packed constraints across the samples, padding to + # the length of the longest sample. + lens = [sample.get("constraints").size(0) for sample in samples] + max_len = max(lens) + constraints = torch.zeros((len(samples), max(lens))).long() + for i, sample in enumerate(samples): + constraints[i, 0 : lens[i]] = samples[i].get("constraints") + batch["constraints"] = constraints.index_select(0, sort_order) + + return batch + + +class LanguageTripleDataset(FairseqDataset): + """ + A pair of torch.utils.data.Datasets. + + Args: + src (torch.utils.data.Dataset): source dataset to wrap + src_sizes (List[int]): source sentence lengths + src_dict (~fairseq.data.Dictionary): source vocabulary + tgt (torch.utils.data.Dataset, optional): target dataset to wrap + tgt_sizes (List[int], optional): target sentence lengths + tgt_dict (~fairseq.data.Dictionary, optional): target vocabulary + left_pad_source (bool, optional): pad source tensors on the left side + (default: True). + left_pad_target (bool, optional): pad target tensors on the left side + (default: False). + shuffle (bool, optional): shuffle dataset elements before batching + (default: True). + input_feeding (bool, optional): create a shifted version of the targets + to be passed into the model for teacher forcing (default: True). + remove_eos_from_source (bool, optional): if set, removes eos from end + of source if it's present (default: False). + append_eos_to_target (bool, optional): if set, appends eos to end of + target if it's absent (default: False). + align_dataset (torch.utils.data.Dataset, optional): dataset + containing alignments. + constraints (Tensor, optional): 2d tensor with a concatenated, zero- + delimited list of constraints for each sentence. + append_bos (bool, optional): if set, appends bos to the beginning of + source/target sentence. + num_buckets (int, optional): if set to a value greater than 0, then + batches will be bucketed into the given number of batch shapes. + src_lang_id (int, optional): source language ID, if set, the collated batch + will contain a field 'src_lang_id' in 'net_input' which indicates the + source language of the samples. + tgt_lang_id (int, optional): target language ID, if set, the collated batch + will contain a field 'tgt_lang_id' which indicates the target language + of the samples. + """ + + def __init__( + self, + src, + src_sizes, + src_dict, + ref, + ref_sizes, + ref_dict, + tgt=None, + tgt_sizes=None, + tgt_dict=None, + left_pad_source=True, + left_pad_target=False, + shuffle=True, + input_feeding=True, + remove_eos_from_source=False, + append_eos_to_target=False, + align_dataset=None, + constraints=None, + append_bos=False, + eos=None, + num_buckets=0, + src_lang_id=None, + tgt_lang_id=None, + pad_to_multiple=1, + ): + if tgt_dict is not None: + assert src_dict.pad() == tgt_dict.pad() + assert src_dict.eos() == tgt_dict.eos() + assert src_dict.unk() == tgt_dict.unk() + if tgt is not None: + assert len(src) == len( + tgt + ), "Source and target must contain the same number of examples" + assert len(src) == len( + ref + ), "Source and reference must contain the same number of examples" + self.src = src + self.ref = ref + self.tgt = tgt + self.src_sizes = np.array(src_sizes) + self.ref_sizes = np.array(ref_sizes) + self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None + self.sizes = ( + np.vstack((self.src_sizes, self.tgt_sizes)).T + if self.tgt_sizes is not None + else self.src_sizes + ) + self.src_dict = src_dict + self.ref_dict = ref_dict + self.tgt_dict = tgt_dict + self.left_pad_source = left_pad_source + self.left_pad_target = left_pad_target + self.shuffle = shuffle + self.input_feeding = input_feeding + self.remove_eos_from_source = remove_eos_from_source + self.append_eos_to_target = append_eos_to_target + self.align_dataset = align_dataset + if self.align_dataset is not None: + assert ( + self.tgt_sizes is not None + ), "Both source and target needed when alignments are provided" + self.constraints = constraints + self.append_bos = append_bos + self.eos = eos if eos is not None else src_dict.eos() + self.src_lang_id = src_lang_id + self.tgt_lang_id = tgt_lang_id + if num_buckets > 0: + from fairseq.data import BucketPadLengthDataset + + self.src = BucketPadLengthDataset( + self.src, + sizes=self.src_sizes, + num_buckets=num_buckets, + pad_idx=self.src_dict.pad(), + left_pad=self.left_pad_source, + ) + self.src_sizes = self.src.sizes + logger.info("bucketing source lengths: {}".format(list(self.src.buckets))) + self.ref = BucketPadLengthDataset( + self.ref, + sizes=self.ref_sizes, + num_buckets=num_buckets, + pad_idx=self.ref_dict.pad(), + left_pad=self.left_pad_source, + ) + self.ref_sizes = self.ref.sizes + logger.info("bucketing reference lengths: {}".format(list(self.src.buckets))) + if self.tgt is not None: + self.tgt = BucketPadLengthDataset( + self.tgt, + sizes=self.tgt_sizes, + num_buckets=num_buckets, + pad_idx=self.tgt_dict.pad(), + left_pad=self.left_pad_target, + ) + self.tgt_sizes = self.tgt.sizes + logger.info( + "bucketing target lengths: {}".format(list(self.tgt.buckets)) + ) + + # determine bucket sizes using self.num_tokens, which will return + # the padded lengths (thanks to BucketPadLengthDataset) + num_tokens = np.vectorize(self.num_tokens, otypes=[np.compat.long]) + self.bucketed_num_tokens = num_tokens(np.arange(len(self.src))) + self.buckets = [ + (None, num_tokens) for num_tokens in np.unique(self.bucketed_num_tokens) + ] + else: + self.buckets = None + self.pad_to_multiple = pad_to_multiple + + def get_batch_shapes(self): + return self.buckets + + def __getitem__(self, index): + tgt_item = self.tgt[index] if self.tgt is not None else None + src_item = self.src[index] + ref_item = self.ref[index] + # Append EOS to end of tgt sentence if it does not have an EOS and remove + # EOS from end of src sentence if it exists. This is useful when we use + # use existing datasets for opposite directions i.e., when we want to + # use tgt_dataset as src_dataset and vice versa + if self.append_eos_to_target: + eos = self.tgt_dict.eos() if self.tgt_dict else self.src_dict.eos() + if self.tgt and self.tgt[index][-1] != eos: + tgt_item = torch.cat([self.tgt[index], torch.LongTensor([eos])]) + + if self.append_bos: + bos = self.tgt_dict.bos() if self.tgt_dict else self.src_dict.bos() + if self.tgt and self.tgt[index][0] != bos: + tgt_item = torch.cat([torch.LongTensor([bos]), self.tgt[index]]) + + bos = self.src_dict.bos() + if self.src[index][0] != bos: + src_item = torch.cat([torch.LongTensor([bos]), self.src[index]]) + if self.ref[index][0] != bos: + ref_item = torch.cat([torch.LongTensor([bos]), self.ref[index]]) + + if self.remove_eos_from_source: + eos = self.src_dict.eos() + if self.src[index][-1] == eos: + src_item = self.src[index][:-1] + if self.ref[index][-1] == eos: + ref_item = self.ref[index][:-1] + + example = { + "id": index, + "source": src_item, + "reference": ref_item, + "target": tgt_item, + } + if self.align_dataset is not None: + example["alignment"] = self.align_dataset[index] + if self.constraints is not None: + example["constraints"] = self.constraints[index] + return example + + def __len__(self): + return len(self.src) + + def collater(self, samples, pad_to_length=None): + """Merge a list of samples to form a mini-batch. + + Args: + samples (List[dict]): samples to collate + pad_to_length (dict, optional): a dictionary of + {'source': source_pad_to_length, 'target': target_pad_to_length} + to indicate the max length to pad to in source and target respectively. + + Returns: + dict: a mini-batch with the following keys: + + - `id` (LongTensor): example IDs in the original input order + - `ntokens` (int): total number of tokens in the batch + - `net_input` (dict): the input to the Model, containing keys: + + - `src_tokens` (LongTensor): a padded 2D Tensor of tokens in + the source sentence of shape `(bsz, src_len)`. Padding will + appear on the left if *left_pad_source* is ``True``. + - `src_lengths` (LongTensor): 1D Tensor of the unpadded + lengths of each source sentence of shape `(bsz)` + - `prev_output_tokens` (LongTensor): a padded 2D Tensor of + tokens in the target sentence, shifted right by one + position for teacher forcing, of shape `(bsz, tgt_len)`. + This key will not be present if *input_feeding* is + ``False``. Padding will appear on the left if + *left_pad_target* is ``True``. + - `src_lang_id` (LongTensor): a long Tensor which contains source + language IDs of each sample in the batch + + - `target` (LongTensor): a padded 2D Tensor of tokens in the + target sentence of shape `(bsz, tgt_len)`. Padding will appear + on the left if *left_pad_target* is ``True``. + - `tgt_lang_id` (LongTensor): a long Tensor which contains target language + IDs of each sample in the batch + """ + res = collate( + samples, + pad_idx=self.src_dict.pad(), + eos_idx=self.eos, + left_pad_source=self.left_pad_source, + left_pad_target=self.left_pad_target, + input_feeding=self.input_feeding, + pad_to_length=pad_to_length, + pad_to_multiple=self.pad_to_multiple, + ) + if self.src_lang_id is not None or self.tgt_lang_id is not None: + src_tokens = res["net_input"]["src_tokens"] + bsz = src_tokens.size(0) + if self.src_lang_id is not None: + res["net_input"]["src_lang_id"] = ( + torch.LongTensor([[self.src_lang_id]]).expand(bsz, 1).to(src_tokens) + ) + if self.tgt_lang_id is not None: + res["tgt_lang_id"] = ( + torch.LongTensor([[self.tgt_lang_id]]).expand(bsz, 1).to(src_tokens) + ) + return res + + def num_tokens(self, index): + """Return the number of tokens in a sample. This value is used to + enforce ``--max-tokens`` during batching.""" + return max( + self.src_sizes[index], + self.tgt_sizes[index] if self.tgt_sizes is not None else 0, + ) + + def num_tokens_vec(self, indices): + """Return the number of tokens for a set of positions defined by indices. + This value is used to enforce ``--max-tokens`` during batching.""" + sizes = self.src_sizes[indices] + if self.tgt_sizes is not None: + sizes = np.maximum(sizes, self.tgt_sizes[indices]) + return sizes + + def size(self, index): + """Return an example's size as a float or tuple. This value is used when + filtering a dataset with ``--max-positions``.""" + return ( + self.src_sizes[index], + self.tgt_sizes[index] if self.tgt_sizes is not None else 0, + ) + + def ordered_indices(self): + """Return an ordered list of indices. Batches will be constructed based + on this order.""" + if self.shuffle: + indices = np.random.permutation(len(self)).astype(np.int64) + else: + indices = np.arange(len(self), dtype=np.int64) + if self.buckets is None: + # sort by target length, then source length + if self.tgt_sizes is not None: + indices = indices[np.argsort(self.tgt_sizes[indices], kind="mergesort")] + return indices[np.argsort(self.src_sizes[indices], kind="mergesort")] + else: + # sort by bucketed_num_tokens, which is: + # max(padded_src_len, padded_tgt_len) + return indices[ + np.argsort(self.bucketed_num_tokens[indices], kind="mergesort") + ] + + @property + def supports_prefetch(self): + return getattr(self.src, "supports_prefetch", False) and ( + getattr(self.tgt, "supports_prefetch", False) or self.tgt is None + ) + + def prefetch(self, indices): + self.src.prefetch(indices) + if self.tgt is not None: + self.tgt.prefetch(indices) + if self.align_dataset is not None: + self.align_dataset.prefetch(indices) + + def filter_indices_by_size(self, indices, max_sizes): + """Filter a list of sample indices. Remove those that are longer + than specified in max_sizes. + + Args: + indices (np.array): original array of sample indices + max_sizes (int or list[int] or tuple[int]): max sample size, + can be defined separately for src and tgt (then list or tuple) + + Returns: + np.array: filtered sample array + list: list of removed indices + """ + return data_utils.filter_paired_dataset_indices_by_size( + self.src_sizes, + self.tgt_sizes, + indices, + max_sizes, + ) diff --git a/SpeechT5/SpeechUT/speechut/data/load_langpair_dataset.py b/SpeechT5/SpeechUT/speechut/data/load_langpair_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..bfd204598e67d41a5688e16b0835f96fd40cf384 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/data/load_langpair_dataset.py @@ -0,0 +1,172 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/272c4c5197250997148fb12c0db6306035f166a4/fairseq/tasks/translation.py + 1. Add custom lang_format in function load_langpair_dataset + 2. If truncate_source (default no), use RandomCropDataset instead of TruncateDataset +""" + +import itertools +import logging +import os + +from fairseq.data import ( + AppendTokenDataset, + LanguagePairDataset, + PrependTokenDataset, + StripTokenDataset, + TruncateDataset, + RandomCropDataset, + data_utils, + indexed_dataset, +) + +from speechut.data.concat_dataset import ConcatDataset + + +EVAL_BLEU_ORDER = 4 + + +logger = logging.getLogger(__name__) + + +def load_langpair_dataset( + data_path, + split, + src, + src_dict, + tgt, + tgt_dict, + combine, + dataset_impl, + upsample_primary, + left_pad_source, + left_pad_target, + max_source_positions, + max_target_positions, + prepend_bos=False, + load_alignments=False, + truncate_source=False, + append_source_id=False, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + prepend_bos_src=None, + lang_format="[{}]", + input_feeding=True, +): + def split_exists(split, src, tgt, lang, data_path): + filename = os.path.join(data_path, "{}.{}-{}.{}".format(split, src, tgt, lang)) + return indexed_dataset.dataset_exists(filename, impl=dataset_impl) + + src_datasets = [] + tgt_datasets = [] + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + + # infer langcode + if split_exists(split_k, src, tgt, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, src, tgt)) + elif split_exists(split_k, tgt, src, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, tgt, src)) + else: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + src_dataset = data_utils.load_indexed_dataset( + prefix + src, src_dict, dataset_impl + ) + if truncate_source: + src_dataset = AppendTokenDataset( + RandomCropDataset( + StripTokenDataset(src_dataset, src_dict.eos()), + max_source_positions - 1, + ), + src_dict.eos(), + ) + src_datasets.append(src_dataset) + + tgt_dataset = data_utils.load_indexed_dataset( + prefix + tgt, tgt_dict, dataset_impl + ) + if tgt_dataset is not None: + tgt_datasets.append(tgt_dataset) + + logger.info( + "{} {} {}-{} {} examples".format( + data_path, split_k, src, tgt, len(src_datasets[-1]) + ) + ) + + if not combine: + break + + assert len(src_datasets) == len(tgt_datasets) or len(tgt_datasets) == 0 + + if len(src_datasets) == 1: + src_dataset = src_datasets[0] + tgt_dataset = tgt_datasets[0] if len(tgt_datasets) > 0 else None + else: + sample_ratios = [1] * len(src_datasets) + sample_ratios[0] = upsample_primary + src_dataset = ConcatDataset(src_datasets, sample_ratios) + if len(tgt_datasets) > 0: + tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios) + else: + tgt_dataset = None + + if prepend_bos: + assert hasattr(src_dict, "bos_index") and hasattr(tgt_dict, "bos_index") + src_dataset = PrependTokenDataset(src_dataset, src_dict.bos()) + if tgt_dataset is not None: + tgt_dataset = PrependTokenDataset(tgt_dataset, tgt_dict.bos()) + elif prepend_bos_src is not None: + logger.info(f"prepending src bos: {prepend_bos_src}") + src_dataset = PrependTokenDataset(src_dataset, prepend_bos_src) + + eos = None + if append_source_id: + src_dataset = AppendTokenDataset( + src_dataset, src_dict.index(lang_format.format(src)) + ) + if tgt_dataset is not None: + tgt_dataset = AppendTokenDataset( + tgt_dataset, tgt_dict.index(lang_format.format(tgt)) + ) + eos = tgt_dict.index(lang_format.format(tgt)) + + align_dataset = None + if load_alignments: + align_path = os.path.join(data_path, "{}.align.{}-{}".format(split, src, tgt)) + if indexed_dataset.dataset_exists(align_path, impl=dataset_impl): + align_dataset = data_utils.load_indexed_dataset( + align_path, None, dataset_impl + ) + + tgt_dataset_sizes = tgt_dataset.sizes if tgt_dataset is not None else None + return LanguagePairDataset( + src_dataset, + src_dataset.sizes, + src_dict, + tgt_dataset, + tgt_dataset_sizes, + tgt_dict, + left_pad_source=left_pad_source, + left_pad_target=left_pad_target, + align_dataset=align_dataset, + eos=eos, + num_buckets=num_buckets, + shuffle=shuffle, + pad_to_multiple=pad_to_multiple, + input_feeding=input_feeding, + ) diff --git a/SpeechT5/SpeechUT/speechut/data/multimodal_corpus_dataset.py b/SpeechT5/SpeechUT/speechut/data/multimodal_corpus_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..19a6f8962757dec9b32430a98cd6e850d1f30d19 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/data/multimodal_corpus_dataset.py @@ -0,0 +1,368 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +from os import replace +import time +from collections import OrderedDict +from typing import Any, Dict, List, Optional + +import numpy as np +from fairseq.data import data_utils + +from fairseq.data import FairseqDataset + +logger = logging.getLogger(__name__) + + +class MultiCorpusDataset(FairseqDataset): + """ + see fairseq/fairseq/data/multi_corpus_dataset.__doc__ + + Args: + datasets: a OrderedDict of FairseqDataset instances. + distribution: a List containing the probability of getting an utterance from + corresponding dataset + seed: random seed for sampling the datsets + sort_indices: if true, will sort the ordered indices by size + batch_sample: if true, will ensure each batch is from a single dataset + """ + + def __init__( + self, + datasets: Dict[str, FairseqDataset], + max_positions: Dict, + distribution: List[float], + max_tokens_ratio: List[float], + seed: int = 1234, + sort_indices: bool = False, + check_length: bool = False, + ): + super().__init__() + assert isinstance(datasets, OrderedDict) + assert len(datasets) == len(distribution) + # assert sum(distribution) == 1 + self.datasets = datasets + self.distribution = distribution + self.max_tokens_ratio = max_tokens_ratio + self.seed = seed + self.sort_indices = sort_indices + self.max_positions = max_positions + self.check_length = check_length + + # Avoid repeated conversions to list later + self.dataset_list = list(datasets.values()) + self.total_num_instances = 0 + + # first_dataset = self.dataset_list[0] + + self.num_instances_per_dataset = [] + self.dataset_offsets = [] + for i, dataset in enumerate(self.dataset_list): + assert isinstance(dataset, FairseqDataset) + # assert type(dataset) is type(first_dataset) + self.num_instances_per_dataset.append( + 0 if self.distribution[i] == 0 else len(dataset) + ) + self.dataset_offsets.append(self.total_num_instances) + self.total_num_instances += self.num_instances_per_dataset[i] + + def ordered_indices(self): + start = time.time() + with data_utils.numpy_seed(self.seed, self.epoch): + logger.info(f"sampling new dataset with seed {self.seed} epoch {self.epoch}") + sampled_indices = {} + + # For each dataset i, sample self.distribution[i] * self.total_num_instances + for i, key in enumerate(self.datasets): + tp = time.time() + if self.distribution[i] == 0: + # skip dataset if sampling probability is 0 + continue + + if i < len(self.datasets) - 1: + num_instances = int(self.distribution[i] * self.total_num_instances) + high = self.dataset_offsets[i + 1] + else: + num_instances = int(self.distribution[i] * self.total_num_instances) + high = self.total_num_instances + + logger.info(f"sampling {num_instances} from {key} dataset") + + # First, add k copies of the dataset where k = num_instances // len(dataset). + # This ensures an equal distribution of the data points as much as possible. + # For the remaining entries randomly sample them + dataset_size = len(self.datasets[key]) + num_copies = num_instances // dataset_size + dataset_indices = np.random.permutation(high - self.dataset_offsets[i])[: num_instances - num_copies * dataset_size] + if num_copies > 0: + dataset_indices = np.concatenate( + ( + np.repeat( + np.arange(high - self.dataset_offsets[i]), num_copies + ), + dataset_indices, + ) + ) + # filter by size, we should ignore it by setting check_length=False + # , as it is very time-consuming on large dadaset + if self.max_positions[key] is not None and self.check_length: + dataset_indices, ignored = self.datasets[key].filter_indices_by_size( + dataset_indices, + self.max_positions[key], + ) + if len(ignored) > 0: + logger.warning( + ( + "{:,} samples have invalid sizes and will be skipped, " + "max_positions={}, first few sample ids={}" + ).format(len(ignored), self.max_positions[key], ignored[:10]) + ) + + if self.sort_indices: + logger.info(" - sampled indices took {}s".format(time.time() - tp)) + tp = time.time() + dataset_indices = np.sort(dataset_indices) + ordered_indices = self.datasets[key].ordered_indices() + if isinstance(ordered_indices[0], np.ndarray): # chunked audio data + dataset_indices = [order_idx + self.dataset_offsets[i] for order_idx in ordered_indices] + assert self.dataset_offsets[i] == 0 + # TODO for chunked audio data, now assume len(dataset_indices) == len(dataset). Don't filter any data. + else: + dataset_indices = ordered_indices[dataset_indices] + self.dataset_offsets[i] + logger.info(" - ordered_indices took {}s".format(time.time() - tp)) + else: + np.random.shuffle(dataset_indices) + + sampled_indices[key] = dataset_indices + + logger.info( + "multi_corpus_dataset ordered_indices took {}s".format( + time.time() - start + ) + ) + return sampled_indices + + def _map_index(self, index: int): + """ + If dataset A has length N and dataset B has length M + then index 1 maps to index 1 of dataset A, and index N + 1 + maps to index 1 of B. + """ + counter = 0 + for num_instances, key in zip(self.num_instances_per_dataset, self.datasets): + if index < counter + num_instances: + return index - counter, key + counter += num_instances + raise ValueError( + "Invalid index: {}, max: {}".format(index, self.total_num_instances) + ) + + def __len__(self): + """ + Length of this dataset is the sum of individual datasets + """ + return self.total_num_instances + + def __getitem__(self, index): + new_index, key = self._map_index(index) + try: + item = self.datasets[key][new_index] + item["full_id"] = index + return item + except Exception as e: + e.args = (f"Error from {key} dataset", *e.args) + raise + + def collater(self, samples): + """ + If we are doing batch sampling, then pick the right collater to use. + + Otherwise we assume all collaters are the same. + """ + if len(samples) == 0: + return None + + samples_dict = {key: [] for key in self.datasets} + for s in samples: + _, key = self._map_index(s["full_id"]) + samples_dict[key].append(s) + + batch = {} + for key in samples_dict: + if len(samples_dict[key]) == 0: + continue + batch[key] = self.datasets[key].collater(samples_dict[key]) + + return batch + + + def num_tokens(self, index: int): + index, key = self._map_index(index) + return self.datasets[key].num_tokens(index) + + def size(self, index: int): + index, key = self._map_index(index) + return self.datasets[key].size(index) + + @property + def can_reuse_epoch_itr_across_epochs(self): + return False + + def set_epoch(self, epoch, **unused): + super().set_epoch(epoch) + logger.info(f"setting epoch of multi_corpus_dataset to {epoch}") + for ds in self.dataset_list: + if hasattr(ds, "set_epoch"): + ds.set_epoch(epoch) + self.epoch = epoch + + @property + def supports_prefetch(self): + return False + + @property + def supports_fetch_outside_dataloader(self): + return all( + self.datasets[key].supports_fetch_outside_dataloader + for key in self.datasets + ) + + + def batch_by_size( + self, + indices, + max_tokens=None, + max_sentences=None, + required_batch_size_multiple=1, + ): + dataset_indices = indices + batches_dict = {} + for n, key in enumerate(dataset_indices): + max_tokens_ratio = self.max_tokens_ratio[n] + if isinstance(dataset_indices[key][0], np.ndarray): # chunked audio data + cur_batches = self.datasets[key].batch_by_size( + dataset_indices[key], + round(max_tokens * max_tokens_ratio), + max_sentences, + required_batch_size_multiple, + ) + logger.info(f"Created {sum([len(b) for b in cur_batches])} [{len(cur_batches)}] batches for dataset {key}") + else: + cur_batches = super().batch_by_size( + np.array(dataset_indices[key], dtype=np.int64), + round(max_tokens * max_tokens_ratio), + max_sentences, + required_batch_size_multiple, + ) + logger.info(f"Created {len(cur_batches)} batches for dataset {key}") + batches_dict[key] = cur_batches + + return batches_dict + + + def get_batch_sampler( + self, + indices, + num_shards, + seed, + max_tokens=None, + max_sentences=None, + required_batch_size_multiple=1, + split_modality_batch=False, + ): + + def batch_sampler(dataset, epoch): + start = time.time() + batches_dict = dataset.batch_by_size( + indices, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + ) + logger.info(f"multi_corpus_dataset, batch_by_size took {time.time() - start}s") + start = time.time() + new_batches = [] + + ### shuffle inner group size, split into speech/text batches + shuffled_batches_list = [] + speech_batches = [] + ### we should specify the speech_batches because: we need concatenate different speech datasets + # (e.g. ltr or km) instead of loading them parellelly. + for name, batches in batches_dict.items(): + if name.startswith("speech"): + if isinstance(batches[0], list): # chunked audio data + batches = self.datasets[name].shuffle_batches(list(batches), seed + epoch) + shuffled_batches_list.append(batches) + else: + batches = inner_bucket_shuffle(batches, seed+epoch, num_shards*10) + batches = batches[: (len(batches) // num_shards) * num_shards] + if len(batches) == 0: + logger.warning(f"Sample 0 batch for {name}, you should ensure that no {name} data provided.") + else: + speech_batches += batches + else: + batches = inner_bucket_shuffle(batches, seed+epoch, num_shards*10) + batches = batches[: (len(batches) // num_shards) * num_shards] + if len(batches) == 0: + logger.warning(f"Sample 0 batch for {name}, you should ensure that no {name} data provided.") + else: + batches = shuffle_buckets(batches, seed=seed+epoch, inner_shuf=False) + shuffled_batches_list.append(batches) + if len(speech_batches) > 0: + speech_batches = shuffle_buckets(speech_batches, seed=seed+epoch, inner_shuf=False) + shuffled_batches_list.append(speech_batches) + + ### create the final new_batches + num_batch = min(len(batches) for batches in shuffled_batches_list) + if split_modality_batch: + for i in range(0, num_batch, num_shards): + for batches in shuffled_batches_list: + new_batches += batches[i: i + num_shards] + else: + for i in range(num_batch): + new_batches.append(np.concatenate([batches[i] for batches in shuffled_batches_list])) + + logger.info(f"multi_corpus_dataset sample {len(new_batches)} batches, took {time.time() - start}s") + return new_batches + + def inner_bucket_shuffle(batches, seed, bucket_size=10, thr=0): + """we assert batches is sorted form long to short. + shuffle samples in a buctet(e.g. 10 batches). + batches: a list of numpy array""" + num_batch = len(batches) + new_batches = [] + num_buckets = len(batches) // bucket_size + i = 0 + while i < num_batch: + if (i < bucket_size * thr or + i >= bucket_size * (num_buckets - thr) + ): + new_batches.append(batches[i]) + i += 1 + else: + group = np.concatenate(batches[i: i+bucket_size]) + with data_utils.numpy_seed(seed): + np.random.shuffle(group) + new_batches += np.array_split(group, bucket_size) + i += bucket_size + assert all([len(batch) > 0 for batch in new_batches]) + return new_batches + + def shuffle_buckets(batches, seed, inner_shuf=True): + if inner_shuf: + batches = inner_bucket_shuffle(batches, seed, num_shards*10) + batches = [batches[i: i + num_shards] for i in range(0, len(batches)-num_shards+1, num_shards)] + assert len(batches[-1]) == num_shards + new_batches = [] + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + for group in batches: + new_batches += group + return new_batches + + return batch_sampler diff --git a/SpeechT5/SpeechUT/speechut/models/__init__.py b/SpeechT5/SpeechUT/speechut/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SpeechT5/SpeechUT/speechut/models/speechut.py b/SpeechT5/SpeechUT/speechut/models/speechut.py new file mode 100644 index 0000000000000000000000000000000000000000..cb668286c1c1c420d0c7d7b9e74a3bca17c6c871 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/models/speechut.py @@ -0,0 +1,785 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from fairseq import utils, checkpoint_utils +from fairseq.data.data_utils import compute_mask_indices +from fairseq.data.dictionary import Dictionary +from fairseq.dataclass import ChoiceEnum +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.transformer import Embedding +from fairseq.file_io import PathManager +from torch import Tensor +from fairseq.models.wav2vec.wav2vec2 import ConvFeatureExtractionModel +from fairseq.modules import GradMultiply, LayerNorm +from fairseq.tasks.hubert_pretraining import ( + HubertPretrainingConfig, + HubertPretrainingTask, +) +from fairseq.models.hubert import HubertConfig +from fairseq.models.transformer import TransformerConfig +from speechut.modules import TransformerEncoder +from speechut.modules import TransformerEncoderBase +from speechut.modules import TransformerDecoderBaseScriptable + +logger = logging.getLogger(__name__) + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) + + +@dataclass + +class SpeechutConfig(HubertConfig): + use_rel_pos_enc: bool = field( + default=False, + metadata={"help": "whether to use relative positional encoding"}, + ) + scaling_for_att: float = field( + default=1.0, + metadata={"help": "scaling for attention weights to prevent overflow issue (for large model)"}, + ) + + # unit encoder-decoder + text_transformer: TransformerConfig = TransformerConfig() + reset_decoder_embedding_config: bool = field( + default=False, + metadata={"help": "reset the no_scale_embedding/layernorm_embedding to default for the decoder"}, + ) + add_unit_encoder: bool = field( + default=False, + metadata={"help": "add unit encoder"}, + ) + add_decoder: bool = field( + default=True, + metadata={"help": "add decoder"}, + ) + add_text_ctc: bool = field( + default=False, + metadata={"help": "add_text_ctc head"}, + ) + text_ctc_conv_kernel: int = field( + default=2, + metadata={"help": "text_ctc_conv kernel size"}, + ) + mask_u2t: bool = field( + default=True, + metadata={"help": "mask the unit input in unit-to-text task"}, + ) + + # embedding mixing + mix_with_unit: bool = field( + default=True, + metadata={"help": "mix with the unit embeddings"}, + ) + use_pred_unit: bool = field( + default=False, + metadata={"help": "use the embeddings of predicted units"}, + ) + l2_embedding: bool = field( + default=False, + metadata={"help": "compute l2 loss between unit embedding and unit hidden state"}, + ) + + # Finetune related + encoder_dict_size: int = field( + default=-1, + metadata={"help": "text encoder dictionary dimension"}, + ) + + decoder_dict_size: int = field( + default=-1, + metadata={"help": "decoder dictionary dimension"}, + ) + + +@register_model("speechut", dataclass=SpeechutConfig) +class SpeechutModel(BaseFairseqModel): + def __init__( + self, + cfg: SpeechutConfig, + task_cfg: HubertPretrainingConfig, + dictionaries: List[Dictionary], + unit_dictionary: Dictionary = None, + text_tgt_dictionary: Dictionary = None, + ) -> None: + super().__init__() + logger.info(f"SpeechutModel Config: {cfg}") + + feature_enc_layers = eval(cfg.conv_feature_layers) # noqa + self.embed = feature_enc_layers[-1][0] + + self.feature_extractor = ConvFeatureExtractionModel( + conv_layers=feature_enc_layers, + dropout=0.0, + mode=cfg.extractor_mode, + conv_bias=cfg.conv_bias, + ) + feature_ds_rate = np.prod([s for _, _, s in feature_enc_layers]) + self.feat2tar_ratio = cfg.label_rate * feature_ds_rate / task_cfg.sample_rate + + self.post_extract_proj = ( + nn.Linear(self.embed, cfg.encoder_embed_dim) + if self.embed != cfg.encoder_embed_dim + else None + ) + + self.mask_prob = cfg.mask_prob + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length = cfg.mask_length + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + self.logit_temp = cfg.logit_temp + self.skip_masked = cfg.skip_masked + self.skip_nomask = cfg.skip_nomask + + final_dim = cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim + + self.mask_emb = nn.Parameter( + torch.FloatTensor(cfg.encoder_embed_dim).uniform_() + ) + + self.encoder = TransformerEncoder(cfg) + self.layer_norm = LayerNorm(self.embed) + + self.target_glu = None + if cfg.target_glu: + self.target_glu = nn.Sequential( + nn.Linear(final_dim, final_dim * 2), nn.GLU() + ) + + self.final_dim = final_dim + assert len(dictionaries) <= 2, f"Only support <=2 kinds of targets, get {len(dictionaries)} dictionaries" + if len(dictionaries) == 1: + dictionaries = [dictionaries[0], dictionaries[0]] + self.num_classes = [len(d) for d in dictionaries] + + self.final_proj = nn.Linear(cfg.encoder_embed_dim, final_dim) + self.code_encoder_proj = nn.Linear(cfg.text_transformer.encoder.embed_dim, self.num_classes[-1]) + self.final_proj_list = [self.final_proj, self.code_encoder_proj] + + self.label_embs_concat = nn.Parameter(torch.FloatTensor(self.num_classes[0], final_dim)) + self.label_embs_list = [self.label_embs_concat] + for p in self.label_embs_list: + nn.init.uniform_(p) + + ### build unit encoder: + self.mask_u2t = cfg.mask_u2t + self.add_text_ctc = cfg.add_text_ctc + self.text_ctc_conv_kernel = cfg.text_ctc_conv_kernel + self.padding_idx = unit_dictionary.pad() + self.unit_mask_idx = unit_dictionary.index("") + + self.add_unit_encoder = cfg.add_unit_encoder + self.mix_with_unit = cfg.mix_with_unit + self.use_pred_unit = cfg.use_pred_unit + self.l2_embedding = cfg.l2_embedding + if self.add_unit_encoder: + assert len(unit_dictionary) == self.num_classes[0], f"unit_dictionary: {len(unit_dictionary)}, self.num_classes[0]: {self.num_classes[0]}" + ### build unit pre-net, and shared with hubert label_embs if needed (default: False) + self.unit_embed_tokens = self.build_embedding( + unit_dictionary, + cfg.text_transformer.encoder.embed_dim, + ) + if self.final_dim == cfg.text_transformer.encoder.embed_dim: + logger.info("Share label_embs[0] with unit_embed_tokens ...") + nn.init.uniform_(self.unit_embed_tokens.weight) + self.label_embs_list[0] = self.unit_embed_tokens.weight + + ### build unit encoder + self.unit_encoder = TransformerEncoderBase( + cfg.text_transformer, + unit_dictionary, + self.unit_embed_tokens, + use_rel_pos_enc=cfg.use_rel_pos_enc, + scaling_for_att=cfg.scaling_for_att, + ) + + ### build text ctc head + if self.add_text_ctc: + conv = nn.Conv1d( + cfg.text_transformer.encoder.embed_dim, cfg.text_transformer.encoder.embed_dim, + self.text_ctc_conv_kernel, + stride=self.text_ctc_conv_kernel // 2, + bias=False, + padding=self.text_ctc_conv_kernel // 2, + ) + nn.init.kaiming_normal_(conv.weight) + self.unit_encoder_ctc_head = nn.Sequential( + Rotate3D(), + conv, + nn.Dropout(p=0.1), + nn.Sequential( + Rotate3D(), + Rotate3D(), + LayerNorm(cfg.text_transformer.encoder.embed_dim), + ), + nn.GELU(), + nn.Linear(cfg.text_transformer.encoder.embed_dim, len(text_tgt_dictionary)), + ) + + ### build unit2text decoder, not available for now + self.add_decoder = cfg.add_decoder + self.text_transformer_cfg = cfg.text_transformer + if self.add_decoder: + # To make sure that the decoder dict size is the same as the fine-tuning tgt_dict size or bpe code dict size + dec_dictionary = self.cutting_dictionary(text_tgt_dictionary, cfg.decoder_dict_size) + decoder_embed_tokens = self.build_embedding( + dec_dictionary, cfg.text_transformer.decoder.embed_dim + ) + if cfg.reset_decoder_embedding_config: + cfg.text_transformer.no_scale_embedding = False + cfg.text_transformer.layernorm_embedding = False + cfg.text_transformer.no_token_positional_embeddings = False + self.decoder = TransformerDecoderBaseScriptable(cfg.text_transformer, dec_dictionary, decoder_embed_tokens, use_rel_pos_enc=cfg.use_rel_pos_enc) + + + def cutting_dictionary(self, dictionary, dict_size): + if dictionary is None or dict_size <= 0: + return dictionary + else: + import copy + cut_dictionary = copy.deepcopy(dictionary) + if dict_size > len(cut_dictionary): + for i in range(dict_size - len(cut_dictionary)): + cut_dictionary.symbols.append(f'_{i}_') + else: + cut_dictionary.symbols = cut_dictionary.symbols[:dict_size] + return cut_dictionary + + def build_embedding(self, dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + return Embedding(num_embeddings, embed_dim, padding_idx) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: SpeechutConfig, task: HubertPretrainingTask): + """Build a new model instance.""" + unit_dictionary = getattr(task, "text_src_dictionary", None) + text_tgt_dictionary = getattr(task, "text_dictionary", None) + model = SpeechutModel(cfg, task.cfg, task.dictionaries, unit_dictionary, text_tgt_dictionary) + return model + + def apply_mask(self, x, padding_mask, target_list): + B, T, C = x.shape + if self.mask_prob > 0: + mask_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob, + self.mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def forward_features(self, source: torch.Tensor) -> torch.Tensor: + if self.feature_grad_mult > 0: + features = self.feature_extractor(source) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = self.feature_extractor(source) + return features + + def forward_targets( + self, + features: torch.Tensor, + target_list: List[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Trim features to ensure labels exist and then get aligned labels + feat_tsz = features.size(2) + targ_tsz = min([t.size(1) for t in target_list]) + if self.feat2tar_ratio * feat_tsz > targ_tsz: + feat_tsz = int(targ_tsz / self.feat2tar_ratio) + features = features[..., :feat_tsz] + target_inds = torch.arange(feat_tsz).float() * self.feat2tar_ratio + target_inds += np.random.choice(int(self.feat2tar_ratio)) + target_list = [t[:, target_inds.long()] for t in target_list] + return features, target_list + + def forward_padding_mask( + self, + features: torch.Tensor, + padding_mask: torch.Tensor, + ) -> torch.Tensor: + extra = padding_mask.size(1) % features.size(1) + if extra > 0: + padding_mask = padding_mask[:, :-extra] + padding_mask = padding_mask.view(padding_mask.size(0), features.size(1), -1) + padding_mask = padding_mask.all(-1) + return padding_mask + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + def downsample_ctc_padding_mask(self, padding_mask): + """ + padding_mask: (B, T) + """ + stride = self.text_ctc_conv_kernel // 2 + return padding_mask[:, ::stride] + + def compute_pred(self, proj_x, label_embs): + if self.target_glu: + label_embs = self.target_glu(label_embs) + x = F.normalize(proj_x.float(), dim=-1) # (S, D) + label_embs = F.normalize(label_embs.float(), dim=-1) # (C, D) + logits = torch.matmul(x, label_embs.T).type_as(proj_x) # (S, C) + logits /= self.logit_temp + return logits + + def compute_hubert_logits(self, x, target, proj, label_embs, padding_mask, mask_indices): + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + proj_x_m = proj(x[masked_indices]) + logit_m_list = [(self.compute_pred(proj_x_m, label_embs), target[masked_indices])] + else: + logit_m_list = [None] + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + proj_x_u = proj(x[nomask_indices]) + logit_u_list = [(self.compute_pred(proj_x_u, label_embs), target[nomask_indices])] + else: + logit_u_list = [None] + + return logit_m_list, logit_u_list + + def compute_ce_logits(self, x, target, proj, padding_mask, mask_indices): + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + logit_m_list = [(proj(x[masked_indices]), target[masked_indices])] + else: + logit_m_list = [None] + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + logit_u_list = [(proj(x[nomask_indices]), target[nomask_indices])] + else: + logit_u_list = [None] + + return logit_m_list, logit_u_list + + def convert_embeddings(self, + x, + padding_mask, + target=None, + mask_indices=None, + mix_with_unit=False, + use_pred_unit=False, + l2_embedding=False, + remask=False + ): + """ + 1. Mix with units if needed (default: True) + 2. Prepare for unit_encoder inputs + Inputs: + x, (B, T, D) + Return: + src_tokens, (B, T) + soft_embeddings, (B, T, D) + l2_loss, a loss + """ + soft_embeddings = self.final_proj_list[0](x) if x.size(-1) == self.final_dim else x + if padding_mask is None: + padding_mask = soft_embeddings.new_zeros(soft_embeddings.size(0), soft_embeddings.size(1), dtype=torch.long) + if use_pred_unit: + src_tokens = self.compute_pred(self.final_proj_list[0](x), self.label_embs_list[0]).argmax(dim=-1) + src_tokens[padding_mask] = self.padding_idx + elif target is not None: + src_tokens = target + else: + src_tokens = padding_mask.long() + + if l2_embedding | mix_with_unit: + unit_embeddings = self.unit_embed_tokens(src_tokens) # (B, T, D) + + l2_loss = 0 + if l2_embedding: + if mask_indices is not None: + l2_loss = (soft_embeddings - unit_embeddings)[mask_indices].float().pow(2).mean(dim=-1) + scale = unit_embeddings[mask_indices].float().pow(2).sum(dim=-1) + else: + l2_loss = (soft_embeddings - unit_embeddings).float().pow(2).mean(dim=-1) + scale = unit_embeddings.float().pow(2).sum(dim=-1) + l2_loss = (l2_loss / scale).mean() + + if mix_with_unit: + B, T, D = x.shape + selected_indices = compute_mask_indices( + (B, T), + padding_mask, + self.mask_prob / 2, + self.mask_length // 2, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + selected_indices = torch.from_numpy(selected_indices).to(x.device) + if mask_indices is not None: + if remask: + remask_indices = torch.logical_and(selected_indices, mask_indices) + soft_embeddings[remask_indices] = self.mask_emb + swap_indices = torch.logical_and(selected_indices, ~mask_indices) + else: + swap_indices = selected_indices + soft_embeddings[swap_indices] = unit_embeddings[swap_indices] + + soft_embeddings = soft_embeddings * (1 - padding_mask.unsqueeze(-1).type_as(x)) + return src_tokens, soft_embeddings, l2_loss + + def forward( + self, + source: torch.Tensor = None, + src_tokens: torch.Tensor = None, + src_lengths: torch.Tensor = None, + prev_output_tokens: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + assert source is not None or src_tokens is not None + if source is not None: + return self.forward_speech( + source=source, + target_list=target_list, + padding_mask=padding_mask, + mask=mask, + features_only=features_only, + output_layer=output_layer, + ) + else: + return self.forward_text( + src_tokens=src_tokens, + src_lengths=src_lengths, + prev_output_tokens=prev_output_tokens, + mask=self.mask_u2t, + features_only=features_only, + output_layer=output_layer, + ) + + def forward_speech( + self, + source: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + """output layer is 1-based""" + features = self.forward_features(source) + if target_list is not None: + features, target_list = self.forward_targets(features, target_list) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + + if mask: + x, mask_indices = self.apply_mask(features, padding_mask, target_list) + else: + x = features + mask_indices = None + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, _ = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1, + ) + + if features_only: + return {"x": x, "padding_mask": padding_mask, "features": features} + + logit_m_list, logit_u_list = self.compute_hubert_logits( + x, + target_list[0], + self.final_proj_list[0], + self.label_embs_list[0], + padding_mask, + mask_indices, + ) + + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + + if self.add_unit_encoder: + src_tokens, x_emb, l2_loss = self.convert_embeddings( + x, + padding_mask, target_list[0], + mask_indices=mask_indices, + mix_with_unit=self.mix_with_unit, + use_pred_unit=self.use_pred_unit, + l2_embedding=self.l2_embedding, + ) + encoder_out = self.unit_encoder(src_tokens, token_embeddings=x_emb) + + result['encoder_out'] = encoder_out['encoder_out'] # [(T, B, D)] + result['encoder_padding_mask'] = encoder_out['encoder_padding_mask'] # [(B, T)] + if self.l2_embedding: + result['embedding_l2_loss'] = l2_loss + + code_logit_m_list, code_logit_u_list = self.compute_ce_logits( + encoder_out['encoder_out'][0].transpose(0, 1), # -> (B, T, C) + target_list[-1], + self.final_proj_list[1], + padding_mask, + mask_indices, + ) + result['logit_m_list'] += code_logit_m_list + result['logit_u_list'] += code_logit_u_list + return result + + def forward_text( + self, + src_tokens: torch.Tensor = None, + src_lengths: torch.Tensor = None, + prev_output_tokens: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + ) -> Dict[str, torch.Tensor]: + assert self.add_unit_encoder, f"Can not forward unit-text branch without unit_encoder!" + + padding_mask = src_tokens == self.padding_idx + unit_embeddings = self.unit_embed_tokens(src_tokens) + if mask: + unit_embeddings, mask_indices = self.apply_mask(unit_embeddings, padding_mask, [src_tokens]) + + encoder_out = self.unit_encoder( + src_tokens, + token_embeddings=unit_embeddings, + return_all_hiddens=output_layer is not None, + ) + + result = {} + result["encoder_out"] = encoder_out["encoder_out"] + result["encoder_states"] = encoder_out["encoder_states"] + result["padding_mask"] = padding_mask + + if self.add_text_ctc: + result["encoder_out_ctc"] = [self.unit_encoder_ctc_head(x) for x in encoder_out['encoder_out']] + result["encoder_padding_mask"] = [ + self.downsample_ctc_padding_mask(padding_mask) for padding_mask in encoder_out['encoder_padding_mask'] + ] + + if features_only: + return result + if self.add_decoder: + assert prev_output_tokens is not None + decoder_out = self.decoder( + prev_output_tokens=prev_output_tokens, encoder_out=encoder_out, + ) + result['decoder_out'] = decoder_out + return result + + def forward_mum(self, src_tokens, target, mask=True): + target_list = [target] + padding_mask = src_tokens.eq(self.unit_encoder.padding_idx) + unit_embeddings = self.unit_embed_tokens(src_tokens) + if mask: + unit_embeddings, mask_indices = self.apply_mask(unit_embeddings, padding_mask, target_list) + else: + ### If already applied mask on src_tokens, then the target_list should contains many padding_idx + mask_indices = target_list[-1] != self.padding_idx + unit_embeddings[mask_indices] = self.mask_emb + + encoder_out = self.unit_encoder( + src_tokens, + token_embeddings=unit_embeddings, + ) + code_logit_m_list, code_logit_u_list = self.compute_ce_logits( + encoder_out["encoder_out"][0].transpose(0, 1), + target_list[-1], + self.final_proj_list[1], + padding_mask, + mask_indices, + ) + result = {} + result["logit_m_list"] = code_logit_m_list + result["logit_u_list"] = code_logit_u_list + result["padding_mask"] = padding_mask + return result + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + **kwargs, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Extract encoder features for only speech input""" + res = self.forward( + source, + padding_mask=padding_mask, + mask=mask, + features_only=True, + output_layer=output_layer, + ) + x = res["x"] # B x T x D + padding_mask = res["padding_mask"] + + if self.add_unit_encoder: + src_tokens, x, _ = self.convert_embeddings( + x, + padding_mask, + mix_with_unit=False, + use_pred_unit=False, + ) + encoder_out = self.unit_encoder( + src_tokens, + token_embeddings=x, + return_all_hiddens=output_layer is not None + ) + res["x"] = encoder_out['encoder_out'][0].transpose(0, 1) # (B, T, D) + + feature = res["features"] if ret_conv else res["x"] + if output_layer is not None: + feature = encoder_out['encoder_states'] + + return feature, padding_mask + + def get_logits(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + logits_list = [x[0].float() for x in logits_list if x is not None] + return logits_list + + def get_targets(self, net_output, is_masked=True): + if is_masked: + logits_list = net_output["logit_m_list"] + else: + logits_list = net_output["logit_u_list"] + targets_list = [x[1].long() for x in logits_list if x is not None] + return targets_list + + def get_extra_losses(self, net_output): + extra_losses = [] + names = [] + + if "features_pen" in net_output: + extra_losses.append(net_output["features_pen"]) + names.append("features_pen") + + if "embedding_l2_loss" in net_output: + extra_losses.append(net_output["embedding_l2_loss"]) + names.append("embedding_l2_loss") + + return extra_losses, names + + def remove_pretraining_modules(self, step2=False): + self.target_glu = None + + def load_checkpoint(self, checkpoint: str): + if not PathManager.exists(checkpoint): + raise IOError("Model file not found: {}".format(checkpoint)) + state = checkpoint_utils.load_checkpoint_to_cpu(checkpoint) + return state + +class Rotate3D(nn.Module): + """ + (T, B, D) --> (B, D, T) --> (D, T, B) --> (T, B, D) + """ + def __init__(self): + super().__init__() + + def forward(self, x): + return x.permute(1, 2, 0) diff --git a/SpeechT5/SpeechUT/speechut/models/speechut_asr.py b/SpeechT5/SpeechUT/speechut/models/speechut_asr.py new file mode 100644 index 0000000000000000000000000000000000000000..f9ec9d8488b4f7e552804d355de000c80fb35b78 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/models/speechut_asr.py @@ -0,0 +1,165 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import contextlib +import torch +from dataclasses import dataclass, field +from fairseq import utils +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.fairseq_encoder import FairseqEncoder +from fairseq.models.hubert import HubertAsrConfig, HubertEncoder +from fairseq.tasks import FairseqTask + +@dataclass +class SpeechUTASRConfig(HubertAsrConfig): + add_decoder: bool = field( + default=True, + metadata={"help": "add decoder for fine-tune"}, + ) + +@register_model("speechut_asr", dataclass=SpeechUTASRConfig) +class SpeechUTASR(BaseFairseqModel): + """ + A encoder-ctc-decoder model if cfg.add_decoder is True, or a encoder-ctc model + """ + def __init__(self, cfg: SpeechUTASRConfig, encoder: FairseqEncoder): + super().__init__() + self.cfg = cfg + self.encoder = encoder + if not cfg.add_decoder: + self.encoder.w2v_model.decoder = None + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: SpeechUTASRConfig, task: FairseqTask): + """Build a new model instance.""" + encoder = SpeechUTEncoder(cfg, task) + return cls(cfg, encoder) + + def forward(self, source, padding_mask, prev_output_tokens, **kwargs): + encoder_out = self.encoder(source, padding_mask, **kwargs) + + x = self.encoder.final_dropout(encoder_out['encoder_out'][0]) # (T, B, C) + if self.encoder.proj: + x = self.encoder.proj(x) + if self.encoder.conv_ctc_proj: + padding_mask = self.encoder.w2v_model.downsample_ctc_padding_mask(encoder_out["encoder_padding_mask"][0]) + else: + padding_mask = encoder_out["encoder_padding_mask"] + + decoder_out = self.decoder( + prev_output_tokens, encoder_out=encoder_out, **kwargs + ) if self.cfg.add_decoder else None + + return { + "encoder_out_ctc": x, # (T, B, C), for CTC loss + "padding_mask": padding_mask, # (B, T), for CTC loss + "decoder_out": decoder_out, # for ED loss + } + + def forward_decoder(self, prev_output_tokens, **kwargs): + return self.decoder(prev_output_tokens, **kwargs) + + def get_logits(self, net_output): + """For CTC decoding""" + logits = net_output["encoder_out"] + padding = net_output["encoder_padding_mask"] + if padding is not None and padding.any(): + padding = padding.T + logits[padding][..., 0] = 0 + logits[padding][..., 1:] = float("-inf") + + return logits + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """For 1) computing CTC loss, 2) decoder decoding.""" + + if "encoder_out_ctc" in net_output: + logits = net_output["encoder_out_ctc"] + else: + return self.decoder.get_normalized_probs(net_output, log_probs, sample) + + if isinstance(logits, list): + logits = logits[0] + + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + @property + def decoder(self): + return self.encoder.w2v_model.decoder + + +class SpeechUTEncoder(HubertEncoder): + """ + Modified from fairseq.models.hubert.hubert_asr.HubertEncoder + 1. make it compatible with encoder-decoder model + """ + def __init__(self, cfg: HubertAsrConfig, task): + super().__init__(cfg, task) + + if (task.target_dictionary is not None) and ( + hasattr(self.w2v_model, "unit_encoder_ctc_head") + ): + self.proj = self.w2v_model.unit_encoder_ctc_head + self.conv_ctc_proj = True + else: + self.conv_ctc_proj = False + + def forward(self, source, padding_mask, tbc=True, **kwargs): + w2v_args = { + "source": source, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + } + ft = self.freeze_finetune_updates <= self.num_updates + with torch.no_grad() if not ft else contextlib.ExitStack(): + x, padding_mask = self.w2v_model.extract_features(**w2v_args) + if tbc: + # B x T x C -> T x B x C + x = x.transpose(0, 1) + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [padding_mask], # B x T + } + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + + Forward the encoder out. + """ + x, padding_mask = self.w2v_model.extract_features(**net_input, mask=False) + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + encoder_out = { + "encoder_out" : [x], + "encoder_padding_mask" : [padding_mask], + } + if self.proj: + x = self.proj(x) + encoder_out["encoder_out_ctc"] = x + + return encoder_out + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = [ + x.index_select(1, new_order) for x in encoder_out["encoder_out"] + ] + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = [ + x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"] + ] + return encoder_out diff --git a/SpeechT5/SpeechUT/speechut/models/speechut_st.py b/SpeechT5/SpeechUT/speechut/models/speechut_st.py new file mode 100644 index 0000000000000000000000000000000000000000..6faaccfc89748a2692bd1eaec200588449d10423 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/models/speechut_st.py @@ -0,0 +1,221 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import contextlib +import torch +import torch.nn as nn +from argparse import Namespace +from dataclasses import dataclass +from typing import Any +from fairseq import checkpoint_utils, tasks +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.fairseq_encoder import FairseqEncoder +from fairseq.tasks import FairseqTask +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.data.data_utils import lengths_to_padding_mask + +from fairseq.models.hubert import HubertAsrConfig + +logger = logging.getLogger(__name__) + +@dataclass +class SpeechUTS2TConfig(HubertAsrConfig): + ### the following config is only for the compatibility to fairseq speech_to_text task + input_feat_per_channel: Any = None + input_channels: Any = None + speaker_to_id: Any = None + +@register_model("speechut_st_legacy", dataclass=SpeechUTS2TConfig) +class SpeechUTS2T(BaseFairseqModel): + """An encoder-decoder model.""" + def __init__(self, cfg: SpeechUTS2TConfig, encoder: FairseqEncoder): + super().__init__() + self.cfg = cfg + self.encoder = encoder + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: SpeechUTS2TConfig, task: FairseqTask): + """Build a new model instance.""" + encoder = SpeechUTEncoder(cfg, task) + return cls(cfg, encoder) + + def forward(self, src_tokens, src_lengths, prev_output_tokens, **kwargs): + encoder_out = self.encoder(src_tokens, src_lengths, **kwargs) + decoder_out = self.encoder.w2v_model.decoder( + prev_output_tokens, encoder_out=encoder_out, **kwargs + ) + return decoder_out + + def forward_decoder(self, prev_output_tokens, **kwargs): + return self.encoder.w2v_model.decoder(prev_output_tokens, **kwargs) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """For decoder decoding.""" + return self.encoder.w2v_model.decoder.get_normalized_probs(net_output, log_probs, sample) + + @property + def decoder(self): + return self.encoder.w2v_model.decoder + + +class SpeechUTEncoder(FairseqEncoder): + """ + Modified from fairseq.models.hubert.hubert_asr.HubertEncoder + 1. make it compatible with fairseq speech_to_text task + 2. make it compatible with encoder-decoder model + """ + def __init__(self, cfg: SpeechUTS2TConfig, task): + self.apply_mask = cfg.apply_mask + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + } + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + assert task.data_cfg.standardize_audio() == w2v_args.task.normalize, ( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + pretrain_task = tasks.setup_task(w2v_args.task, load_local_states=False) + assert state is not None and "task_state" in state, f"the stored dictionaries not found in checkpoint!" + # This will load the stored "dictionaries" object + pretrain_task.load_state_dict(state["task_state"]) + + model = pretrain_task.build_model(w2v_args.model, from_checkpoint=True) + if state is not None and not cfg.no_pretrained_weights: + try: + model.load_state_dict(state["model"], strict=True) + except Exception as e: + logger.warn(e) + model.load_state_dict(state["model"], strict=False) + + model.remove_pretraining_modules() + + super().__init__(pretrain_task.source_dictionary) + + d = w2v_args.model.encoder_embed_dim + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.num_updates = 0 + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, src_tokens=None, src_lengths=None, **kwargs): + + w2v_args = { + "source": src_tokens, + "padding_mask": lengths_to_padding_mask(src_lengths), + "mask": self.apply_mask and self.training, + } + + ft = self.freeze_finetune_updates <= self.num_updates + + with torch.no_grad() if not ft else contextlib.ExitStack(): + x, padding_mask = self.w2v_model.extract_features(**w2v_args) + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [padding_mask], # B x T + "padding_mask": [padding_mask], + } + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + + Forward the encoder out. + """ + _net_input = { + "source": net_input["src_tokens"], + "padding_mask": lengths_to_padding_mask(net_input["src_lengths"]), + "mask": False, + } + + x, padding_mask = self.w2v_model.extract_features(**_net_input) + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + encoder_out = { + "encoder_out" : [x], + "encoder_padding_mask" : [padding_mask], + } + return encoder_out + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = [ + x.index_select(1, new_order) for x in encoder_out["encoder_out"] + ] + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = [ + x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"] + ] + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/SpeechT5/SpeechUT/speechut/models/t5_transformer_lm.py b/SpeechT5/SpeechUT/speechut/models/t5_transformer_lm.py new file mode 100644 index 0000000000000000000000000000000000000000..3d16a2df00b692114f8d84d254cf486d09e1137b --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/models/t5_transformer_lm.py @@ -0,0 +1,25 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +from fairseq.models import ( + register_model_architecture, +) +from fairseq.models.transformer_lm import base_lm_architecture + + +@register_model_architecture(model_name="transformer_lm", arch_name="transformer_lm_t5") +def transformer_lm_t5(args): + args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1280) + args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 6144) + args.decoder_layers = getattr(args, "decoder_layers", 20) + args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) + args.dropout = getattr(args, "dropout", 0.1) + args.attention_dropout = getattr(args, "attention_dropout", 0.1) + args.activation_fn = getattr(args, "activation_fn", "gelu") + base_lm_architecture(args) diff --git a/SpeechT5/SpeechUT/speechut/modules/__init__.py b/SpeechT5/SpeechUT/speechut/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dad97814e515d8e68d68e4e031d4f9c9055f3864 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/modules/__init__.py @@ -0,0 +1,27 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +from .learned_positional_embedding import LearnedPositionalEmbedding +from .multihead_attention import MultiheadAttention +from .relative_pos_enc import RelativePositionalEncoding +from .transformer_layer import TransformerEncoderLayerBase, TransformerDecoderLayerBase +from .w2v_encoder import TransformerEncoder, TransformerSentenceEncoderLayer +from .transformer_encoder import TransformerEncoderBase +from .transformer_decoder import TransformerDecoderScriptable, TransformerDecoderBaseScriptable + +__all__ = [ + "MultiheadAttention", + "RelativePositionalEncoding", + "LearnedPositionalEmbedding", + "TransformerEncoderLayerBase", + "TransformerDecoderLayerBase", + "TransformerEncoder", + "TransformerSentenceEncoderLayer", + "TransformerEncoderBase", + "TransformerDecoderScriptable", + "TransformerDecoderBaseScriptable", +] diff --git a/SpeechT5/SpeechUT/speechut/modules/ctc_prefix_score.py b/SpeechT5/SpeechUT/speechut/modules/ctc_prefix_score.py new file mode 100644 index 0000000000000000000000000000000000000000..b42cbd819abf7bdd718bef3db3f553c8360ac384 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/modules/ctc_prefix_score.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +# Copyright 2018 Mitsubishi Electric Research Labs (Takaaki Hori) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import numpy as np +import six + + +class CTCPrefixScore(object): + """Compute CTC label sequence scores + which is based on Algorithm 2 in WATANABE et al. + "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," + but extended to efficiently compute the probablities of multiple labels + simultaneously + """ + + def __init__(self, x, blank, eos, xp): + self.xp = xp + self.logzero = -10000000000.0 + self.blank = blank + self.eos = eos + self.input_length = len(x) + self.x = x + + def initial_state(self): + """Obtain an initial CTC state + :return: CTC state + """ + # initial CTC state is made of a frame x 2 tensor that corresponds to + # r_t^n() and r_t^b(), where 0 and 1 of axis=1 represent + # superscripts n and b (non-blank and blank), respectively. + r = self.xp.full((self.input_length, 2), self.logzero, dtype=np.float32) + r[0, 1] = self.x[0, self.blank] + for i in six.moves.range(1, self.input_length): + r[i, 1] = r[i - 1, 1] + self.x[i, self.blank] + return r + + def __call__(self, y, cs, r_prev): + """Compute CTC prefix scores for next labels + :param y : prefix label sequence + :param cs : array of next labels + :param r_prev: previous CTC state + :return ctc_scores, ctc_states + """ + # initialize CTC states + output_length = len(y) - 1 # ignore sos + # new CTC states are prepared as a frame x (n or b) x n_labels tensor + # that corresponds to r_t^n(h) and r_t^b(h). + r = self.xp.ndarray((self.input_length, 2, len(cs)), dtype=np.float32) + xs = self.x[:, cs] + if output_length == 0: + r[0, 0] = xs[0] + r[0, 1] = self.logzero + else: + r[output_length - 1] = self.logzero + + # prepare forward probabilities for the last label + r_sum = self.xp.logaddexp( + r_prev[:, 0], r_prev[:, 1] + ) # log(r_t^n(g) + r_t^b(g)) + last = y[-1] + if output_length > 0 and last in cs: + log_phi = self.xp.ndarray((self.input_length, len(cs)), dtype=np.float32) + for i in six.moves.range(len(cs)): + log_phi[:, i] = r_sum if cs[i] != last else r_prev[:, 1] + else: + log_phi = r_sum + + # compute forward probabilities log(r_t^n(h)), log(r_t^b(h)), + # and log prefix probabilities log(psi) + start = max(output_length, 1) + log_psi = r[start - 1, 0] + for t in six.moves.range(start, self.input_length): + r[t, 0] = self.xp.logaddexp(r[t - 1, 0], log_phi[t - 1]) + xs[t] + r[t, 1] = ( + self.xp.logaddexp(r[t - 1, 0], r[t - 1, 1]) + self.x[t, self.blank] + ) + log_psi = self.xp.logaddexp(log_psi, log_phi[t - 1] + xs[t]) + + # get P(...eos|X) that ends with the prefix itself + eos_pos = self.xp.where(cs == self.eos)[0] + if len(eos_pos) > 0: + log_psi[eos_pos] = r_sum[-1] # log(r_T^n(g) + r_T^b(g)) + + # exclude blank probs + blank_pos = self.xp.where(cs == self.blank)[0] + if len(blank_pos) > 0: + log_psi[blank_pos] = self.logzero + + # return the log prefix probability and CTC states, where the label axis + # of the CTC states is moved to the first axis to slice it easily + return log_psi, self.xp.rollaxis(r, 2) diff --git a/SpeechT5/SpeechUT/speechut/modules/learned_positional_embedding.py b/SpeechT5/SpeechUT/speechut/modules/learned_positional_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..20c8558e20b2172a8c607e2f5c32aa146ff2b9cf --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/modules/learned_positional_embedding.py @@ -0,0 +1,69 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/modules/learned_positional_embedding.py + 1. Add clamping if the input length exceeds the max-source-tokens +""" + +from typing import Dict, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from torch import Tensor + + +class LearnedPositionalEmbedding(nn.Embedding): + """ + This module learns positional embeddings up to a fixed maximum size. + Padding ids are ignored by either offsetting based on padding_idx + or by setting padding_idx to None and ensuring that the appropriate + position ids are passed to the forward function. + """ + + def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: int): + super().__init__(num_embeddings, embedding_dim, padding_idx) + self.onnx_trace = False + if self.padding_idx is not None: + self.max_positions = self.num_embeddings - self.padding_idx - 1 + else: + self.max_positions = self.num_embeddings + + def forward( + self, + input: Tensor, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + positions: Optional[Tensor] = None, + ): + """Input is expected to be of size [bsz x seqlen].""" + assert (positions is None) or ( + self.padding_idx is None + ), "If positions is pre-computed then padding_idx should not be set." + + if positions is None: + if incremental_state is not None: + # positions is the same for every token when decoding a single step + # Without the int() cast, it doesn't work in some cases when exporting to ONNX + positions = torch.zeros( + (1, 1), device=input.device, dtype=input.dtype + ).fill_(int(self.padding_idx + input.size(1))) + else: + positions = utils.make_positions( + input, self.padding_idx, onnx_trace=self.onnx_trace + ) + positions = torch.clamp(positions, max=self.padding_idx + self.max_positions) + return F.embedding( + positions, + self.weight, + self.padding_idx, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.sparse, + ) diff --git a/SpeechT5/SpeechUT/speechut/modules/multihead_attention.py b/SpeechT5/SpeechUT/speechut/modules/multihead_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..89f46ab628ebe7faa1a3db2fd4f31a7269bb006a --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/modules/multihead_attention.py @@ -0,0 +1,346 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +from typing import Dict, Optional, Tuple + +import torch +import torch.nn.functional as F +from fairseq import utils +from torch import Tensor + +from fairseq.modules import MultiheadAttention as FairseqMultiheadAttention + + +class MultiheadAttention(FairseqMultiheadAttention): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + scaling_for_att=1.0 + ): + super().__init__( + embed_dim, + num_heads, + kdim, + vdim, + dropout, + bias, + add_bias_kv, + add_zero_attn, + self_attention, + encoder_decoder_attention, + q_noise, + qn_block_size, + ) + self.scaling_for_att = scaling_for_att + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + position_bias: Optional[Tensor] = None, + ) -> Tuple[Tensor, Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + is_tpu = query.device.type == "xla" + + tgt_len, bsz, embed_dim = query.size() + src_len = tgt_len + assert embed_dim == self.embed_dim, f"query dim {embed_dim} != {self.embed_dim}" + assert list(query.size()) == [tgt_len, bsz, embed_dim] + if key is not None: + src_len, key_bsz, _ = key.size() + if not torch.jit.is_scripting(): + assert key_bsz == bsz + assert value is not None + assert src_len, bsz == value.shape[:2] + + if ( + not self.onnx_trace + and not is_tpu # don't use PyTorch version on TPUs + and incremental_state is None + and not static_kv + # A workaround for quantization to work. Otherwise JIT compilation + # treats bias in linear module as method. + and not torch.jit.is_scripting() + and position_bias is None + ): + assert key is not None and value is not None + return F.multi_head_attention_forward( + query, + key, + value, + self.embed_dim, + self.num_heads, + torch.empty([0]), + torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), + self.bias_k, + self.bias_v, + self.add_zero_attn, + self.dropout_module.p, + self.out_proj.weight, + self.out_proj.bias, + self.training or self.dropout_module.apply_during_inference, + key_padding_mask, + need_weights, + attn_mask, + use_separate_proj_weight=True, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + ) + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + q *= (1 / self.scaling_for_att) + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if k is not None: + k = ( + k.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + src_len = k.size(1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + assert k.size(1) == src_len + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + torch.zeros(key_padding_mask.size(0), 1).type_as( + key_padding_mask + ), + ], + dim=1, + ) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + if position_bias is not None: ## first order + ## position_bias: [241, 241, 64] + #print ("attn_weights: ", attn_weights.size()) # [492, 241, 241] + reshape_q = q.contiguous().view(bsz * self.num_heads, -1, self.head_dim).transpose(0,1) #[241, 492, 64] + #print ("reshape_q: ", reshape_q.size()) + B = torch.matmul(reshape_q, position_bias.transpose(-2, -1)) + #print ("B: ", B.size()) ## [241, 492, 241] + #B = B.transpose(0, 1).view(bsz, self.num_heads, position_bias.size(0), position_bias.size(1)) + B = B.transpose(0, 1).view(bsz*self.num_heads, position_bias.size(0), position_bias.size(1)) + #print ("B 2: ", B.size()) + attn_weights += B + + attn_weights *= self.scaling_for_att + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + if not is_tpu: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if self.scaling_for_att > 1.0: + attn_weights = attn_weights - attn_weights.detach().max(dim=-1, keepdim=True)[0] + + if before_softmax: + return attn_weights, v + + attn_weights_float = utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace + ) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + if self.onnx_trace and attn.size(1) == 1: + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + attn_weights = attn_weights_float.view( + bsz, self.num_heads, tgt_len, src_len + ).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights diff --git a/SpeechT5/SpeechUT/speechut/modules/relative_pos_enc.py b/SpeechT5/SpeechUT/speechut/modules/relative_pos_enc.py new file mode 100644 index 0000000000000000000000000000000000000000..7021fc0941fef310ca5571c101b8a8e18ffc1db6 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/modules/relative_pos_enc.py @@ -0,0 +1,33 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import torch + +class RelativePositionalEncoding(torch.nn.Module): + def __init__(self, d_model, maxlen=1000, embed_v=False): + super(RelativePositionalEncoding, self).__init__() + + self.d_model = d_model + self.maxlen = maxlen + self.pe_k = torch.nn.Embedding(2*maxlen, d_model) + if embed_v: + self.pe_v = torch.nn.Embedding(2*maxlen, d_model) + self.embed_v = embed_v + + + def forward(self, pos_seq, incremental_state=None): + pos_seq[pos_seq < -self.maxlen] = -self.maxlen + pos_seq[pos_seq >= self.maxlen] = self.maxlen - 1 + pos_seq = pos_seq + self.maxlen + + if incremental_state is not None: + pos_seq = pos_seq[-1:] + + if self.embed_v: + return self.pe_k(pos_seq), self.pe_v(pos_seq) + else: + return self.pe_k(pos_seq), None diff --git a/SpeechT5/SpeechUT/speechut/modules/transformer_decoder.py b/SpeechT5/SpeechUT/speechut/modules/transformer_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..84417b44b2672e49cf92bad8355d2dae48661b55 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/modules/transformer_decoder.py @@ -0,0 +1,543 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/transformer/transformer_decoder.py +""" + +import math +from typing import Any, Dict, List, Optional + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqIncrementalDecoder +from fairseq.models.transformer import TransformerConfig +from fairseq.modules import ( + AdaptiveSoftmax, + BaseLayer, + FairseqDropout, + LayerDropModuleList, + LayerNorm, + PositionalEmbedding, + SinusoidalPositionalEmbedding, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from torch import Tensor + +from speechut.modules import transformer_layer +from speechut.modules import RelativePositionalEncoding + +# rewrite name for backward compatibility in `make_generation_fast_` +def module_name_fordropout(module_name: str) -> str: + if module_name == "TransformerDecoderBase": + return "TransformerDecoder" + else: + return module_name + + +class TransformerDecoderBase(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *cfg.decoder.layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + cfg, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + use_rel_pos_enc=False, + ): + self.cfg = cfg + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + self._future_mask = torch.empty(0) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.decoder_layerdrop = cfg.decoder.layerdrop + self.share_input_output_embed = cfg.share_decoder_input_output_embed + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = cfg.decoder.embed_dim + self.embed_dim = embed_dim + self.output_embed_dim = cfg.decoder.output_dim + + self.padding_idx = embed_tokens.padding_idx + self.max_target_positions = cfg.max_target_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + self.embed_positions = ( + PositionalEmbedding( + self.max_target_positions, + embed_dim, + self.padding_idx, + learned=cfg.decoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + self.cross_self_attention = cfg.cross_self_attention + + if self.decoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.decoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.use_rel_pos_enc = use_rel_pos_enc + self.layers.extend( + [ + self.build_decoder_layer(cfg, no_encoder_attn) + for _ in range(cfg.decoder.layers) + ] + ) + self.num_layers = len(self.layers) + + if cfg.decoder.normalize_before and not cfg.no_decoder_final_norm: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + + self.project_out_dim = ( + Linear(embed_dim, self.output_embed_dim, bias=False) + if embed_dim != self.output_embed_dim and not cfg.tie_adaptive_weights + else None + ) + + self.adaptive_softmax = None + self.output_projection = output_projection + if self.output_projection is None: + self.build_output_projection(cfg, dictionary, embed_tokens) + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(embed_dim // cfg.decoder.attention_heads, 24) + + def build_output_projection(self, cfg, dictionary, embed_tokens): + if cfg.adaptive_softmax_cutoff is not None: + self.adaptive_softmax = AdaptiveSoftmax( + len(dictionary), + self.output_embed_dim, + utils.eval_str_list(cfg.adaptive_softmax_cutoff, type=int), + dropout=cfg.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if cfg.tie_adaptive_weights else None, + factor=cfg.adaptive_softmax_factor, + tie_proj=cfg.tie_adaptive_proj, + ) + elif self.share_input_output_embed: + self.output_projection = nn.Linear( + self.embed_tokens.weight.shape[1], + self.embed_tokens.weight.shape[0], + bias=False, + ) + self.output_projection.weight = self.embed_tokens.weight + else: + self.output_projection = nn.Linear( + self.output_embed_dim, len(dictionary), bias=False + ) + nn.init.normal_( + self.output_projection.weight, mean=0, std=self.output_embed_dim ** -0.5 + ) + num_base_layers = cfg.base_layers + for i in range(num_base_layers): + self.layers.insert( + ((i + 1) * cfg.decoder.layers) // (num_base_layers + 1), + BaseLayer(cfg), + ) + + def build_decoder_layer(self, cfg, no_encoder_attn=False): + layer = transformer_layer.TransformerDecoderLayerBase(cfg, no_encoder_attn, has_relative_attention_bias=self.use_rel_pos_enc) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + + if not features_only: + x = self.output_layer(x) + return x, extra + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + return self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + + """ + A scriptable subclass of this class has an extract_features method and calls + super().extract_features, but super() is not supported in torchscript. A copy of + this function is made to be used in the subclass instead. + """ + + def extract_features_scriptable( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + """ + Similar to *forward* but only return features. + + Includes several features from "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + bs, slen = prev_output_tokens.size() + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + enc: Optional[Tensor] = None + padding_mask: Optional[Tensor] = None + if encoder_out is not None and len(encoder_out["encoder_out"]) > 0: + enc = encoder_out["encoder_out"][0] + assert ( + enc.size()[1] == bs + ), f"Expected enc.shape == (t, {bs}, c) got {enc.shape}" + if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0: + padding_mask = encoder_out["encoder_padding_mask"][0] + + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if self.use_rel_pos_enc: + pos_seq = torch.arange(0, slen).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, _ = self.pos_emb(pos_seq, incremental_state) + else: + pos_k = None + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + + # decoder layers + attn: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = layer( + x, + enc, + padding_mask, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + pos_bias=pos_k, + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + + if attn is not None: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": [attn], "inner_states": inner_states} + + def output_layer(self, features): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + return self.output_projection(features) + else: + return features + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround. + if ( + self._future_mask.size(0) == 0 + or (not self._future_mask.device == tensor.device) + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1 + ) + self._future_mask = self._future_mask.to(tensor) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = "{}.embed_positions.weights".format(name) + if weights_key in state_dict: + del state_dict[weights_key] + state_dict[ + "{}.embed_positions._float_tensor".format(name) + ] = torch.FloatTensor(1) + + if f"{name}.output_projection.weight" not in state_dict: + if self.share_input_output_embed: + embed_out_key = f"{name}.embed_tokens.weight" + else: + embed_out_key = f"{name}.embed_out" + if embed_out_key in state_dict: + state_dict[f"{name}.output_projection.weight"] = state_dict[ + embed_out_key + ] + if not self.share_input_output_embed: + del state_dict[embed_out_key] + + for i in range(self.num_layers): + # update layer norms + layer_norm_map = { + "0": "self_attn_layer_norm", + "1": "encoder_attn_layer_norm", + "2": "final_layer_norm", + } + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m) + if k in state_dict: + state_dict[ + "{}.layers.{}.{}.{}".format(name, i, new, m) + ] = state_dict[k] + del state_dict[k] + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m + +class TransformerDecoderBaseScriptable(TransformerDecoderBase): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + return x, None + + +class TransformerDecoder(TransformerDecoderBase): + def __init__( + self, + args, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + ): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + no_encoder_attn=no_encoder_attn, + output_projection=output_projection, + use_rel_pos_enc=getattr(args, "use_rel_pos_enc", False), + ) + + def build_output_projection(self, args, dictionary, embed_tokens): + super().build_output_projection( + TransformerConfig.from_namespace(args), dictionary, embed_tokens + ) + + def build_decoder_layer(self, args, no_encoder_attn=False): + return super().build_decoder_layer( + TransformerConfig.from_namespace(args), no_encoder_attn=no_encoder_attn + ) + +class TransformerDecoderScriptable(TransformerDecoder): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + return x, None diff --git a/SpeechT5/SpeechUT/speechut/modules/transformer_encoder.py b/SpeechT5/SpeechUT/speechut/modules/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..f94e1fed8a005ec59d1e422157e08d88ff95bfda --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/modules/transformer_encoder.py @@ -0,0 +1,401 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import math +from typing import Dict, List, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqEncoder +from fairseq.modules import ( + FairseqDropout, + LayerDropModuleList, + LayerNorm, + SinusoidalPositionalEmbedding, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from torch import Tensor +from fairseq.models.transformer import ( + TransformerConfig, +) + + +from speechut.modules import transformer_layer, LearnedPositionalEmbedding +from speechut.modules import RelativePositionalEncoding + +# rewrite name for backward compatibility in `make_generation_fast_` +def module_name_fordropout(module_name: str) -> str: + if module_name == "TransformerEncoderBase": + return "TransformerEncoder" + else: + return module_name + + +class TransformerEncoderBase(FairseqEncoder): + """ + Transformer encoder consisting of *cfg.encoder.layers* layers. Each layer + is a :class:`TransformerEncoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): encoding dictionary + embed_tokens (torch.nn.Embedding): input embedding + """ + + def __init__(self, cfg, dictionary, embed_tokens, use_rel_pos_enc=False, scaling_for_att=1.0): + self.cfg = cfg + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.encoder_layerdrop = cfg.encoder.layerdrop + + embed_dim = embed_tokens.embedding_dim + self.padding_idx = embed_tokens.padding_idx + self.max_source_positions = cfg.max_source_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + self.embed_positions = ( + PositionalEmbedding( + cfg.max_source_positions, + embed_dim, + self.padding_idx, + learned=cfg.encoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + if self.encoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.encoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.use_rel_pos_enc = use_rel_pos_enc + self.scaling_for_att = scaling_for_att + self.layers.extend( + [self.build_encoder_layer(cfg) for i in range(cfg.encoder.layers)] + ) + self.num_layers = len(self.layers) + + if cfg.encoder.normalize_before: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(embed_dim // cfg.encoder.attention_heads, 160) + + def build_encoder_layer(self, cfg): + layer = transformer_layer.TransformerEncoderLayerBase(cfg, has_relative_attention_bias=self.use_rel_pos_enc, scaling_for_att=self.scaling_for_att) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward_embedding( + self, src_tokens, token_embedding: Optional[torch.Tensor] = None + ): + # embed tokens and positions + if token_embedding is None: + token_embedding = self.embed_tokens(src_tokens) + x = embed = self.embed_scale * token_embedding + if self.embed_positions is not None: + x = embed + self.embed_positions(src_tokens) + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + x = self.dropout_module(x) + if self.quant_noise is not None: + x = self.quant_noise(x) + return x, embed + + def forward( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + uniformity_layers: Optional[List[int]] = None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + return self.forward_scriptable( + src_tokens, src_lengths, return_all_hiddens, token_embeddings, uniformity_layers + ) + + # TorchScript doesn't support super() method so that the scriptable Subclass + # can't access the base class model in Torchscript. + # Current workaround is to add a helper function with different name and + # call the helper function from scriptable Subclass. + def forward_scriptable( + self, + src_tokens, + src_lengths: Optional[torch.Tensor] = None, + return_all_hiddens: bool = False, + token_embeddings: Optional[torch.Tensor] = None, + uniformity_layers: Optional[List[int]] = None, + ): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + return_all_hiddens (bool, optional): also return all of the + intermediate hidden states (default: False). + token_embeddings (torch.Tensor, optional): precomputed embeddings + default `None` will recompute embeddings + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + - **encoder_embedding** (Tensor): the (scaled) embedding lookup + of shape `(batch, src_len, embed_dim)` + - **encoder_states** (List[Tensor]): all intermediate + hidden states of shape `(src_len, batch, embed_dim)`. + Only populated if *return_all_hiddens* is True. + """ + # compute padding mask + encoder_padding_mask = src_tokens.eq(self.padding_idx) + has_pads = src_tokens.device.type == "xla" or encoder_padding_mask.any() + + x, encoder_embedding = self.forward_embedding(src_tokens, token_embeddings) + + # account for padding while computing the representation + if has_pads: + x = x * (1 - encoder_padding_mask.unsqueeze(-1).type_as(x)) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if self.use_rel_pos_enc: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + encoder_states = [] + uniformity_hiddens = [] + + if return_all_hiddens: + encoder_states.append(x) + + if uniformity_layers is not None and 0 in uniformity_layers: + x = F.normalize(x.float(), dim=-1).type_as(x) + uniformity_hiddens.append(x) + + # encoder layers + for i, layer in enumerate(self.layers): + x = layer( + x, encoder_padding_mask=encoder_padding_mask if has_pads else None, + pos_bias=pos_k, + ) + if uniformity_layers is not None and i+1 in uniformity_layers: + x = F.normalize(x.float(), dim=-1).type_as(x) + uniformity_hiddens.append(x) + if return_all_hiddens: + assert encoder_states is not None + encoder_states.append(x) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in + # `forward` so we use a dictionary instead. + # TorchScript does not support mixed values so the values are all lists. + # The empty list is equivalent to None. + src_lengths = ( + src_tokens.ne(self.padding_idx) + .sum(dim=1, dtype=torch.int32) + .reshape(-1, 1) + .contiguous() + ) + return { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [encoder_padding_mask], # B x T + "encoder_embedding": [encoder_embedding], # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "uniformity_hiddens": uniformity_hiddens, # List[T x B x C] + "src_tokens": [], + "src_lengths": [src_lengths], + } + + @torch.jit.export + def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + if len(encoder_out["encoder_out"]) == 0: + new_encoder_out = [] + else: + new_encoder_out = [encoder_out["encoder_out"][0].index_select(1, new_order)] + if len(encoder_out["encoder_padding_mask"]) == 0: + new_encoder_padding_mask = [] + else: + new_encoder_padding_mask = [ + encoder_out["encoder_padding_mask"][0].index_select(0, new_order) + ] + if len(encoder_out["encoder_embedding"]) == 0: + new_encoder_embedding = [] + else: + new_encoder_embedding = [ + encoder_out["encoder_embedding"][0].index_select(0, new_order) + ] + + if len(encoder_out["src_tokens"]) == 0: + src_tokens = [] + else: + src_tokens = [(encoder_out["src_tokens"][0]).index_select(0, new_order)] + + if len(encoder_out["src_lengths"]) == 0: + src_lengths = [] + else: + src_lengths = [(encoder_out["src_lengths"][0]).index_select(0, new_order)] + + encoder_states = encoder_out["encoder_states"] + if len(encoder_states) > 0: + for idx, state in enumerate(encoder_states): + encoder_states[idx] = state.index_select(1, new_order) + + return { + "encoder_out": new_encoder_out, # T x B x C + "encoder_padding_mask": new_encoder_padding_mask, # B x T + "encoder_embedding": new_encoder_embedding, # B x T x C + "encoder_states": encoder_states, # List[T x B x C] + "src_tokens": src_tokens, # B x T + "src_lengths": src_lengths, # B x 1 + } + + def max_positions(self): + """Maximum input length supported by the encoder.""" + if self.embed_positions is None: + return self.max_source_positions + return min(self.max_source_positions, self.embed_positions.max_positions) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = "{}.embed_positions.weights".format(name) + if weights_key in state_dict: + print("deleting {0}".format(weights_key)) + del state_dict[weights_key] + state_dict[ + "{}.embed_positions._float_tensor".format(name) + ] = torch.FloatTensor(1) + for i in range(self.num_layers): + # update layer norms + self.layers[i].upgrade_state_dict_named( + state_dict, "{}.layers.{}".format(name, i) + ) + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + return state_dict + + +class TransformerEncoder(TransformerEncoderBase): + def __init__(self, args, dictionary, embed_tokens): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + use_rel_pos_enc=getattr(args, "use_rel_pos_enc", False), + scaling_for_att=getattr(args, "scaling_for_att", 1.0), + ) + + def build_encoder_layer(self, args): + return super().build_encoder_layer( + TransformerConfig.from_namespace(args), + ) + + +def PositionalEmbedding( + num_embeddings: int, + embedding_dim: int, + padding_idx: int, + learned: bool = False, +): + if learned: + # if padding_idx is specified then offset the embedding ids by + # this index and adjust num_embeddings appropriately + # TODO: The right place for this offset would be inside + # LearnedPositionalEmbedding. Move this there for a cleaner implementation. + if padding_idx is not None: + num_embeddings = num_embeddings + padding_idx + 1 + m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) + if padding_idx is not None: + nn.init.constant_(m.weight[padding_idx], 0) + else: + m = SinusoidalPositionalEmbedding( + embedding_dim, + padding_idx, + init_size=num_embeddings + padding_idx + 1, + ) + return m diff --git a/SpeechT5/SpeechUT/speechut/modules/transformer_layer.py b/SpeechT5/SpeechUT/speechut/modules/transformer_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..a71a848f1a5436756168aafd12d71637520b6b67 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/modules/transformer_layer.py @@ -0,0 +1,330 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/modules/transformer_layer.py + https://github.com/microsoft/SpeechT5/blob/main/Speech2C/speech2c/models/modules/transformer_decoder_layer.py +""" + +from typing import Dict, List, Optional + +import torch +from torch import Tensor +from fairseq.modules import LayerNorm +from fairseq.modules.transformer_layer import TransformerEncoderLayerBase as FairseqTransformerEncoderLayerBase +from fairseq.modules.transformer_layer import TransformerDecoderLayerBase as FairseqTransformerDecoderLayerBase + +from speechut.modules import MultiheadAttention + +class TransformerEncoderLayerBase(FairseqTransformerEncoderLayerBase): + """Encoder layer block. + + In the original paper each operation (multi-head attention or FFN) is + postprocessed with: `dropout -> add residual -> layernorm`. In the + tensor2tensor code they suggest that learning is more robust when + preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.encoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + + def __init__(self, cfg, has_relative_attention_bias=False, scaling_for_att=1.0): + self.scaling_for_att = scaling_for_att + super().__init__(cfg) + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embed_dim // cfg.encoder.attention_heads) + + def build_self_attention(self, embed_dim, cfg, scaling_for_att=1.0): + return MultiheadAttention( + embed_dim, + cfg.encoder.attention_heads, + dropout=cfg.attention_dropout, + self_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + scaling_for_att=self.scaling_for_att, + ) + + def forward( + self, + x, + encoder_padding_mask: Optional[Tensor], + attn_mask: Optional[Tensor] = None, + pos_bias=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, seq_len)` where padding elements are indicated by ``1``. + attn_mask (ByteTensor): binary tensor of shape `(tgt_len, src_len)`, + where `tgt_len` is the length of output and `src_len` is the + length of input, though here both are equal to `seq_len`. + `attn_mask[tgt_i, src_j] = 1` means that when calculating the + embedding for `tgt_i`, we exclude (mask out) `src_j`. This is + useful for strided self-attention. + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + # anything in original attn_mask = 1, becomes -1e8 + # anything in original attn_mask = 0, becomes 0 + # Note that we cannot use -inf here, because at some edge cases, + # the attention weight (before softmax) for some padded element in query + # will become -inf, which results in NaN in model parameters + if attn_mask is not None: + attn_mask = attn_mask.masked_fill( + attn_mask.to(torch.bool), -1e8 if x.dtype == torch.float32 else -1e4 + ) + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, _ = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=encoder_padding_mask, + need_weights=False, + attn_mask=attn_mask, + position_bias=pos_bias, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + x = self.fc2(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + return x + + + +class TransformerDecoderLayerBase(FairseqTransformerDecoderLayerBase): + """Decoder layer block. + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.decoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, cfg, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False, has_relative_attention_bias=False, scaling_for_att=1.0, + ): + self.scaling_for_att = scaling_for_att + super().__init__(cfg, + no_encoder_attn, + add_bias_kv, + add_zero_attn, + ) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embed_dim // cfg.decoder.attention_heads) + + def build_self_attention( + self, embed_dim, cfg, add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttention( + embed_dim, + cfg.decoder.attention_heads, + dropout=cfg.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=not cfg.cross_self_attention, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + scaling_for_att=self.scaling_for_att, + ) + + def build_encoder_attention(self, embed_dim, cfg): + return MultiheadAttention( + embed_dim, + cfg.decoder.attention_heads, + kdim=cfg.encoder.embed_dim, + vdim=cfg.encoder.embed_dim, + dropout=cfg.attention_dropout, + encoder_decoder_attention=True, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + scaling_for_att=self.scaling_for_att, + ) + + def forward( + self, + x, + encoder_out: Optional[torch.Tensor] = None, + encoder_padding_mask: Optional[torch.Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + prev_self_attn_state: Optional[List[torch.Tensor]] = None, + prev_attn_state: Optional[List[torch.Tensor]] = None, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + pos_bias=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if need_head_weights: + need_attn = True + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state, saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1 + ) + if self_attn_padding_mask is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + if self.c_attn is not None: + tgt_len, bsz = x.size(0), x.size(1) + x = x.view(tgt_len, bsz, self.nh, self.head_dim) + x = torch.einsum("tbhd,h->tbhd", x, self.c_attn) + x = x.reshape(tgt_len, bsz, self.embed_dim) + if self.attn_ln is not None: + x = self.attn_ln(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + if self.encoder_attn is not None and encoder_out is not None: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + if self.ffn_layernorm is not None: + x = self.ffn_layernorm(x) + x = self.fc2(x) + x = self.dropout_module(x) + if self.w_resid is not None: + residual = torch.mul(self.w_resid, residual) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]] + return x, attn, self_attn_state + return x, attn, None + + def make_generation_fast_(self, need_attn: bool = False, **kwargs): + self.need_attn = need_attn diff --git a/SpeechT5/SpeechUT/speechut/modules/w2v_encoder.py b/SpeechT5/SpeechUT/speechut/modules/w2v_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..386f1eb0a4f4f67b552271e65c0b402d197e5bb2 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/modules/w2v_encoder.py @@ -0,0 +1,281 @@ +# -------------------------------------------------------- +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + wav2vec encoder adding relitive position bias, modified from + https://github.com/microsoft/SpeechT5/blob/main/Speech2C/speech2c/models/modules/transformer_encoder.py + https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/wav2vec/wav2vec2.py +""" + +import math +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.dataclass import ChoiceEnum +from fairseq.modules import ( + LayerNorm, + SamePad, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from fairseq.utils import index_put +from fairseq.distributed import fsdp_wrap +from fairseq.models.wav2vec.utils import pad_to_multiple + +## reload multi-head attition with rel-pos-bias +from fairseq.models.wav2vec.wav2vec2 import TransformerEncoder as W2vTransformerEncoder +from speechut.modules import RelativePositionalEncoding +from speechut.modules import MultiheadAttention + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) + + +class TransformerEncoder(W2vTransformerEncoder): + def __init__(self, args): + super().__init__(args) + + self.dropout = args.dropout + self.embedding_dim = args.encoder_embed_dim + self.required_seq_len_multiple = args.required_seq_len_multiple + self.use_rel_pos_enc = getattr(args, "use_rel_pos_enc", False) + + self.pos_conv = nn.Conv1d( + self.embedding_dim, + self.embedding_dim, + kernel_size=args.conv_pos, + padding=args.conv_pos // 2, + groups=args.conv_pos_groups, + ) + dropout = 0 + std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim)) + nn.init.normal_(self.pos_conv.weight, mean=0, std=std) + nn.init.constant_(self.pos_conv.bias, 0) + + self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2) + self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU()) + + layers = [] + for _ in range(args.encoder_layers): + layer = TransformerSentenceEncoderLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=self.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + has_relative_attention_bias=self.use_rel_pos_enc, + ) + if args.checkpoint_activations: + layer = fsdp_wrap(layer) + layer = checkpoint_wrapper(layer) + layers.append(layer) + self.layers = nn.ModuleList(layers) + + self.layer_norm_first = args.layer_norm_first + self.layer_norm = LayerNorm(self.embedding_dim) + self.layerdrop = args.encoder_layerdrop + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(args.encoder_embed_dim // args.encoder_attention_heads, 160) + + + self.apply(init_bert_params) + + def forward(self, x, padding_mask=None, layer=None): + x, layer_results = self.extract_features(x, padding_mask, layer) + + if self.layer_norm_first and layer is None: + x = self.layer_norm(x) + + return x, layer_results + + def extract_features(self, x, padding_mask=None, tgt_layer=None): + + if padding_mask is not None: + x = index_put(x, padding_mask, 0) + + x_conv = self.pos_conv(x.transpose(1, 2)) + x_conv = x_conv.transpose(1, 2) + x = x + x_conv + + if not self.layer_norm_first: + x = self.layer_norm(x) + + # pad to the sequence length dimension + x, pad_length = pad_to_multiple( + x, self.required_seq_len_multiple, dim=-2, value=0 + ) + if pad_length > 0 and padding_mask is None: + padding_mask = x.new_zeros((x.size(0), x.size(1)), dtype=torch.bool) + padding_mask[:, -pad_length:] = True + else: + padding_mask, _ = pad_to_multiple( + padding_mask, self.required_seq_len_multiple, dim=-1, value=True + ) + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + if self.use_rel_pos_enc: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + layer_results = [] + r = None + for i, layer in enumerate(self.layers): + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, z = layer(x, self_attn_padding_mask=padding_mask, need_weights=False, pos_bias=pos_k) + if tgt_layer is not None: + # unpad if needed + if pad_length > 0: + layer_results.append( + ( + x[:-pad_length], + z[:, :-pad_length, :-pad_length] + if z is not None + else z, + ) + ) + else: + layer_results.append((x, z)) + if i == tgt_layer: + r = x + break + + if r is not None: + x = r + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + # undo paddding + if pad_length > 0: + x = x[:, :-pad_length] + + return x, layer_results + + +class TransformerSentenceEncoderLayer(nn.Module): + """ + Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained + models. + """ + + def __init__( + self, + embedding_dim: float = 768, + ffn_embedding_dim: float = 3072, + num_attention_heads: float = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + layer_norm_first: bool = False, + has_relative_attention_bias: bool = False, + ) -> None: + + super().__init__() + # Initialize parameters + self.embedding_dim = embedding_dim + self.dropout = dropout + self.activation_dropout = activation_dropout + + # Initialize blocks + self.activation_fn = utils.get_activation_fn(activation_fn) + self.self_attn = MultiheadAttention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + self_attention=True, + ) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(self.activation_dropout) + self.dropout3 = nn.Dropout(dropout) + + self.layer_norm_first = layer_norm_first + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = LayerNorm(self.embedding_dim) + self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim) + self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim) + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = LayerNorm(self.embedding_dim) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embedding_dim//num_attention_heads) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + att_args=None, + pos_bias=None, + ): + """ + LayerNorm is applied either before or after the self-attention/ffn + modules similar to the original Transformer imlementation. + """ + residual = x + + if self.layer_norm_first: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + x = self.dropout1(x) + x = residual + x + + residual = x + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + else: + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + position_bias=pos_bias, + ) + + x = self.dropout1(x) + x = residual + x + + x = self.self_attn_layer_norm(x) + + residual = x + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + x = self.final_layer_norm(x) + + return x, attn diff --git a/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/base_speechut_for_asr.sh b/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/base_speechut_for_asr.sh new file mode 100644 index 0000000000000000000000000000000000000000..d5bc7311331208c3f2f65c17586c73ee63cd98f0 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/base_speechut_for_asr.sh @@ -0,0 +1,40 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 2 ] && echo "Usage: $0 [mount=${PWD}] [world_size=32] [update_freq=1]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +mount=$3 +world_size=$4 +update_freq=$5 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=1 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/base_speechut4asr_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/pretrain \ + --config-name speechut_base_librispeech \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + dataset.train_subset=\"train_960+pseudo_libritext.kmu-ltr+merge_960.kmu-none\" \ + dataset.valid_subset=\"dev_clean+dev.kmu-ltr+dev.kmu-none\" \ + dataset.num_workers=0 \ + dataset.max_tokens=1400000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=base_speechut4asr_${world_size}gpu_${update_freq}accum diff --git a/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/base_speechut_for_st.sh b/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/base_speechut_for_st.sh new file mode 100644 index 0000000000000000000000000000000000000000..438a43f55275938c51faefab181dacc1af3567d0 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/base_speechut_for_st.sh @@ -0,0 +1,47 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [mount=${PWD}] [world_size=32] [update_freq=1]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +lang=$3 +mount=$4 +world_size=$5 +update_freq=$6 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=1 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/base_speechut4en${lang}_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/pretrain \ + --config-name speechut_base_librispeech \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + model.add_text_ctc=false \ + model.text_transformer.share_decoder_input_output_embed=true \ + criterion.u2t_ed_weight=1.0 \ + criterion.u2t_ctc_weight=0 \ + \ + dataset.train_subset=\"train_960,mustcuns_${lang}+pseudo_wmt_en${lang}.kmu-spm+train_960.kmu-none,mustcuns_${lang}.kmu-none\" \ + dataset.valid_subset=\"dev_clean+pseudo_valid.kmu-spm+dev.kmu-none\" \ + dataset.num_workers=0 \ + dataset.max_tokens=1400000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=base_speechut4en${lang}_${world_size}gpu_${update_freq}accum + diff --git a/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/base_speechut_for_st_enfr.sh b/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/base_speechut_for_st_enfr.sh new file mode 100644 index 0000000000000000000000000000000000000000..c0c7217d0c124e603bb3b95ff11b7e7e462290c0 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/base_speechut_for_st_enfr.sh @@ -0,0 +1,48 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [lang=fr] [mount=${PWD}] [world_size=32] [update_freq=1]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +lang=$3 +mount=$4 +world_size=$5 +update_freq=$6 +[ -z $lang ] && lang=fr +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=1 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/base_speechut4en${lang}_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/pretrain \ + --config-name speechut_base_librispeech \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + model.add_text_ctc=false \ + criterion.u2t_ed_weight=1.0 \ + criterion.u2t_ctc_weight=0 \ + \ + dataset.train_subset=\"train_960,pretrain_mustc+pseudo_wmt14_enfr.kmu-spm+train_960.kmu-none,pretrain_mustc.kmu-none\" \ + dataset.valid_subset=\"dev_clean+pseudo_valid.kmu-spm+dev.kmu-none\" \ + dataset.num_workers=0 \ + dataset.max_tokens=1400000 \ + optimization.max_update=600000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=base_speechut4en${lang}_${world_size}gpu_${update_freq}accum + diff --git a/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/large_speechut_for_asr.sh b/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/large_speechut_for_asr.sh new file mode 100644 index 0000000000000000000000000000000000000000..e9d64d789ed0421252edd71aa9c8268a42dc42f3 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/pretrain_speechut/large_speechut_for_asr.sh @@ -0,0 +1,41 @@ +# #################################### +# SpeechUT Large model # +# #################################### +[ $# -lt 2 ] && echo "Usage: $0 [mount=${PWD}] [world_size=32] [update_freq=4]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 +DATA_DIR=$1 +TEXT_DATA_DIR=$2 +mount=$3 +world_size=$4 +update_freq=$5 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=32 +[ -z $update_freq ] && update_freq=4 + +CODE_ROOT=${PWD} +MODEL_DIR="${mount}/exp/pretrain/large_speechut4asr_${world_size}gpu_${update_freq}accum" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/pretrain \ + --config-name speechut_large_librilight \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.labels='["km"]' \ + model.label_rate=50 \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + \ + dataset.train_subset=\"train_small+pseudo_libritext.kmu-ltr\" \ + dataset.valid_subset=\"dev_clean+dev.kmu-ltr\" \ + dataset.num_workers=0 \ + dataset.max_tokens=900000 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[${update_freq}] \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=large_speechut4asr_${world_size}gpu_${update_freq}accum + \ No newline at end of file diff --git a/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/finetune960h_large_edctc.sh b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/finetune960h_large_edctc.sh new file mode 100644 index 0000000000000000000000000000000000000000..08a25818bc9fc519e65fa175886545a8650c0906 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/finetune960h_large_edctc.sh @@ -0,0 +1,45 @@ +# #################################### +# SpeechUT Large model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [mount=${PWD}] [world_size=8] [update_freq=3]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +w2v_path=$1 +DATA_DIR=$2 +cpt=$3 +mount=$4 +world_size=$5 +update_freq=$6 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=3 + +CODE_ROOT=${PWD} + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="${mount}/exp/finetune_asr/$exp_name/960h_edctc80k_from_${cpt}_bz3.3m_lr1e-5" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/finetune_asr \ + --config-name speechut_large_960h \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + model.w2v_path=${w2v_path} \ + \ + optimization.lr=[0.00001] \ + optimization.max_update=80000 \ + dataset.max_tokens=1100000 \ + optimization.update_freq=[${update_freq}] \ + distributed_training.distributed_world_size=${world_size} \ + \ + dataset.train_subset="train_960" \ + dataset.valid_subset="dev_other" \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=960h_edctc80k_from_${cpt}_bz3.3m_lr1e-5 diff --git a/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/finetune_base_edctc.sh b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/finetune_base_edctc.sh new file mode 100644 index 0000000000000000000000000000000000000000..cad7bd0a11336a2b5e0c34372d57b7b4b953a414 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/finetune_base_edctc.sh @@ -0,0 +1,45 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [mount=${PWD}] [world_size=8] [update_freq=2]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +w2v_path=$1 +DATA_DIR=$2 +cpt=$3 +mount=$4 +world_size=$5 +update_freq=$6 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=2 + +CODE_ROOT=${PWD} + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="${mount}/exp/finetune_asr/$exp_name/edctc40k_from_${cpt}_bz2.6m_lr1e-5" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/speechut/config/finetune_asr \ + --config-name speechut_base_100h \ + common.user_dir=$CODE_ROOT/speechut \ + \ + task.data=$DATA_DIR \ + task.label_dir=$DATA_DIR \ + model.w2v_path=${w2v_path} \ + \ + optimization.lr=[0.00001] \ + optimization.max_update=40000 \ + dataset.max_tokens=1300000 \ + optimization.update_freq=[${update_freq}] \ + distributed_training.distributed_world_size=${world_size} \ + \ + dataset.train_subset="train_clean_100" \ + dataset.valid_subset="dev_other" \ + \ + common.tensorboard_logdir=$MODEL_DIR \ + checkpoint.save_dir=$MODEL_DIR \ + hydra.run.dir=$MODEL_DIR \ + hydra.job.name=edctc40k_from_${cpt}_bz2.6m_lr1e-5 diff --git a/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_edctc.sh b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_edctc.sh new file mode 100644 index 0000000000000000000000000000000000000000..9dce06398c476a26290839b7f3a8f8632a5060e0 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_edctc.sh @@ -0,0 +1,61 @@ +##################################### +# SpeechUT ASR model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_other] [beam_size=10] [ctc_weight=0.2] [--normalize]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +beam_size=$4 +ctc_weight=$5 +extra=$6 +[ -z $extra ] && echo "Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..." +[ -z $gen_set ] && gen_set="dev_other" +[ -z $beam_size ] && beam_size=10 +[ -z $ctc_weight ] && ctc_weight=0.2 +[ $ctc_weight == 0 ] && [ $beam_size != 1 ] && echo "Change beam size to 1 as no ctc-decoding used..." && beam_size=1 +[ $ctc_weight != 0 ] && extra="$extra --batch-size 1" + +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}/${subset}_${world_size}_${rank} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --user-dir $CODE_ROOT/speechut \ + --label-dir ${DATA_DIR} \ + --labels '["ltr"]' \ + --single-target \ + --post-process letter \ + --gen-subset ${subset} \ + --max-tokens 2000000 \ + \ + --task joint_sc2t_pretraining \ + --add-decoder-target \ + --fine-tuning \ + --pad-audio \ + --random-crop \ + \ + --ctc-weight ${ctc_weight} $extra \ + --beam ${beam_size} \ + \ + --path ${model_path} \ + --results-path $results_path \ + \ + --scoring wer --max-len-a 0.00078125 --max-len-b 200 \ + & +done +wait + + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}/${subset}_${world_size}_${rank} + echo $results_path + tail -n 1 $results_path/generate-*.txt +done diff --git a/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_edctclm.sh b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_edctclm.sh new file mode 100644 index 0000000000000000000000000000000000000000..dadd1a4286de52cef0250640ef64fd4117e11ecb --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_edctclm.sh @@ -0,0 +1,66 @@ +##################################### +# SpeechUT ASR model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_other] [beam_size=30] [ctc_weight=0.3] [lm_weight=0.7] [lm_path] [--normalize]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +beam_size=$4 +ctc_weight=$5 +lm_weight=$6 +lm_path=$7 +extra=$8 +[ -z $extra ] && echo "Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..." +[ -z $gen_set ] && gen_set="dev_other" +[ -z $beam_size ] && beam_size=30 +[ -z $ctc_weight ] && ctc_weight=0.3 +[ -z $lm_weight ] && lm_weight=0.7 +[ -z $lm_path ] && lm_path="/mnt/default/v-junyiao/librispeech/lm/lm_ctc_form/checkpoint_best.pt" +[ $ctc_weight == 0 ] && [ $beam_size != 1 ] && echo "Change beam size to 1 and lm_weight to 0 as no ctc-decoding used..." && beam_size=1 && lm_weight=0 +[ $ctc_weight != 0 ] && extra="$extra --batch-size 1" + +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}_lm${lm_weight}/${subset}_${world_size}_${rank} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --user-dir $CODE_ROOT/speechut \ + --label-dir ${DATA_DIR} \ + --labels '["ltr"]' \ + --single-target \ + --post-process letter \ + --gen-subset ${subset} \ + --max-tokens 800000 \ + \ + --task joint_sc2t_pretraining \ + --add-decoder-target \ + --fine-tuning \ + --pad-audio \ + --random-crop \ + \ + --ctc-weight ${ctc_weight} $extra \ + --lm-weight ${lm_weight} --lm-path ${lm_path} \ + --beam ${beam_size} \ + \ + --path ${model_path} \ + --results-path ${results_path} \ + \ + --scoring wer --max-len-a 0.00078125 --max-len-b 200 \ + & +done +wait + + +for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}_lm${lm_weight}/${subset}_${world_size}_${rank} + echo $results_path + tail -n 1 $results_path/generate-*.txt +done diff --git a/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_lm_nj.sh b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_lm_nj.sh new file mode 100644 index 0000000000000000000000000000000000000000..a5627a59975a01736907a5cc3fb76df335709b43 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_lm_nj.sh @@ -0,0 +1,74 @@ +##################################### +# SpeechUT ASR model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_other] [beam_size=30] [ctc_weight=0.3] [lm_weight=0.7] [lm_path] [nj=8] [ngpu=8] [--normalize]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +beam_size=$4 +ctc_weight=$5 +lm_weight=$6 +lm_path=$7 +nj=$8 +ngpu=$9 +extra=${10} +[ -z $extra ] && echo "Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..." +[ -z $gen_set ] && gen_set="dev_other" +[ -z $beam_size ] && beam_size=30 +[ -z $ctc_weight ] && ctc_weight=0.3 +[ -z $lm_weight ] && lm_weight=0.7 +[ -z $lm_path ] && lm_path="/mnt/default/v-junyiao/librispeech/lm/lm_ctc_form/checkpoint_best.pt" +[ $ctc_weight == 0 ] && [ $beam_size != 1 ] && echo "Change beam size to 1 and lm_weight to 0 as no ctc-decoding used..." && beam_size=1 && lm_weight=0 +[ $ctc_weight != 0 ] && extra="$extra --batch-size 1" +[ -z $nj ] && nj=8 +[ -z $ngpu ] && ngpu=8 + +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} + +world_size=$nj +for rank in $(seq 0 $((nj - 1))); do + export CUDA_VISIBLE_DEVICES=$((rank % $ngpu)) + for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}_lm${lm_weight}/${subset}_${world_size}_${rank} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --user-dir $CODE_ROOT/speechut \ + --label-dir ${DATA_DIR} \ + --labels '["ltr"]' \ + --single-target \ + --post-process letter \ + --gen-subset ${subset} \ + --max-tokens 800000 \ + \ + --task joint_sc2t_pretraining \ + --add-decoder-target \ + --fine-tuning \ + --pad-audio \ + --random-crop \ + \ + --ctc-weight ${ctc_weight} $extra \ + --lm-weight ${lm_weight} --lm-path ${lm_path} \ + --beam ${beam_size} \ + \ + --path ${model_path} \ + --results-path $results_path \ + \ + --scoring wer --max-len-a 0.00078125 --max-len-b 200 \ + --distributed-world-size ${world_size} --distributed-rank ${rank} \ + & + done +done +wait + + +for subset in ${gen_set//,/ }; do + results_dir=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}_lm${lm_weight} + cat $results_dir/${subset}_${world_size}_*/generate-${subset}.txt | grep -v "^Generate" > $results_dir/generate-${subset}.all.txt +done diff --git a/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_nj.sh b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_nj.sh new file mode 100644 index 0000000000000000000000000000000000000000..08e6df431c9856f24122118017b8ae85bacc5444 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_asr/inference_nj.sh @@ -0,0 +1,69 @@ +##################################### +# SpeechUT ASR model # +##################################### +[ $# -lt 2 ] && echo "Usage: $0 [gen-set=dev_other] [beam_size=10] [ctc_weight=0.2] [nj=32] [ngpu=8] [--normalize]" && exit 1 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +gen_set=$3 +beam_size=$4 +ctc_weight=$5 +nj=$6 +ngpu=$7 +extra=$8 +[ -z $extra ] && echo "Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..." +[ -z $gen_set ] && gen_set="dev_other" +[ -z $beam_size ] && beam_size=10 +[ -z $ctc_weight ] && ctc_weight=0.2 +[ $ctc_weight == 0 ] && [ $beam_size != 1 ] && echo "Change beam size to 1 as no ctc-decoding used..." && beam_size=1 +[ $ctc_weight != 0 ] && extra="$extra --batch-size 1" +[ -z $nj ] && nj=32 +[ -z $ngpu ] && ngpu=8 + +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} + +world_size=$nj +for rank in $(seq 0 $((nj - 1))); do + export CUDA_VISIBLE_DEVICES=$((rank % $ngpu)) + for subset in ${gen_set//,/ }; do + results_path=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight}/${subset}_${world_size}_${rank} + [ ! -d $results_path ] && mkdir -p $results_path + + python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --user-dir $CODE_ROOT/speechut \ + --label-dir ${DATA_DIR} \ + --labels '["ltr"]' \ + --single-target \ + --post-process letter \ + --gen-subset ${subset} \ + --max-tokens 2000000 \ + \ + --task joint_sc2t_pretraining \ + --add-decoder-target \ + --fine-tuning \ + --pad-audio \ + --random-crop \ + \ + --ctc-weight ${ctc_weight} $extra \ + --beam ${beam_size} \ + \ + --path ${model_path} \ + --results-path $results_path \ + \ + --scoring wer --max-len-a 0.00078125 --max-len-b 200 \ + --distributed-world-size ${world_size} --distributed-rank ${rank} \ + & + done +done +wait + + +for subset in ${gen_set//,/ }; do + results_dir=$src_dir/decode_${cpt}/beam${beam_size}_ctc${ctc_weight} + cat $results_dir/${subset}_${world_size}_*/generate-${subset}.txt | grep -v "^Generate" > $results_dir/generate-${subset}.all.txt +done diff --git a/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_st/finetune_base_mustc_enxx.sh b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_st/finetune_base_mustc_enxx.sh new file mode 100644 index 0000000000000000000000000000000000000000..59c8a2a0346b708894b1568fa691c062537aa559 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_st/finetune_base_mustc_enxx.sh @@ -0,0 +1,77 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 4 ] && echo "Usage: $0 [mount=${PWD}] [world_size=8] [update_freq=4/6]" && exit 0 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +w2v_path=$1 +DATA_DIR=$2 +lang=$3 +cpt=$4 +mount=$5 +world_size=$6 +update_freq=$7 +[ -z $mount ] && mount=${PWD} +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=4 + +CODE_ROOT=${PWD} + +exp_name=${w2v_path%/*} +exp_name=${exp_name##*/} +MODEL_DIR="$mount/exp/finetune_mustc/$exp_name/legacy_en${lang}_from_${cpt}_bz3.2m_lr3e-5" +[ -d $MODEL_DIR ] || mkdir -p $MODEL_DIR + +max_tokens=800000 +python $CODE_ROOT/fairseq/fairseq_cli/train.py ${DATA_DIR} \ + --save-dir ${MODEL_DIR} \ + --user-dir $CODE_ROOT/speechut \ + --task speech_to_text \ + --config-yaml config_en${lang}.yaml \ + --train-subset "train_st" \ + --valid-subset "dev_st" \ + --fp16 \ + --seed 1 \ + \ + --ddp-backend no_c10d \ + --distributed-world-size ${world_size} \ + --tensorboard-logdir ${MODEL_DIR} \ + \ + --criterion label_smoothed_cross_entropy --report-accuracy \ + --label-smoothing 0.3 \ + \ + --optimizer adam \ + --clip-norm 1.0 \ + --lr 3e-05 \ + --lr-scheduler polynomial_decay --warmup-updates 5000 \ + --max-update 50000 \ + --total-num-update 50000 \ + --update-freq ${update_freq} \ + \ + --max-tokens ${max_tokens} \ + --max-sentences 16 \ + --max-tokens-valid ${max_tokens} \ + --grouped-shuffling \ + --max-source-positions ${max_tokens} \ + --skip-invalid-size-inputs-valid-test \ + --num-workers 0 \ + --best-checkpoint-metric "accuracy" \ + --maximize-best-checkpoint-metric \ + \ + --arch "speechut_st_legacy" \ + --w2v-path ${w2v_path} \ + --layerdrop 0.1 \ + --activation-dropout 0.1 \ + --attention-dropout 0.1 \ + --feature-grad-mult 1.0 \ + \ + --apply-mask --mask-prob 0.5 \ + \ + --log-format json \ + --log-interval 100 \ + --save-interval 1 \ + --keep-last-epochs 5 \ + --keep-best-checkpoints 5 \ + \ + 2>&1 | tee ${MODEL_DIR}/train_en${lang}.log + diff --git a/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_st/inference_st.sh b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_st/inference_st.sh new file mode 100644 index 0000000000000000000000000000000000000000..3aefa10e360f57dbf66cff9d84c800b4da89619f --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/scripts/tune_speechut_st/inference_st.sh @@ -0,0 +1,44 @@ +# #################################### +# SpeechUT Base model # +# #################################### +[ $# -lt 3 ] && echo "Usage: $0 [gen-set=dev] [beam_size=10] [lenpen=1.0]" && exit 0 +[ ${PWD##*/} != SpeechUT ] && echo "Error: dir not match! Switch to SpeechUT/ and run it again!" && exit 1 + +model_path=$1 +DATA_DIR=$2 +lang=$3 +gen_set=$4 +beam_size=$5 +lenpen=$6 +[ -z $gen_set ] && gen_set="dev" +[ -z $beam_size ] && beam_size=10 +[ -z $lenpen ] && lenpen=1 +src_dir=${model_path%/*} +cpt=${model_path##*/} +cpt=${cpt%.*} + +CODE_ROOT=${PWD} +results_path=$src_dir/decode_${cpt}_beam${beam_size}/${gen_set} +[ ! -d $results_path ] && mkdir -p $results_path + +python $CODE_ROOT/fairseq/fairseq_cli/generate.py $DATA_DIR \ + --gen-subset ${gen_set}_st \ + --max-tokens 2000000 \ + --max-source-positions 2000000 \ + --num-workers 0 \ + \ + --user-dir $CODE_ROOT/speechut \ + --task speech_to_text \ + --config-yaml config_en${lang}.yaml \ + \ + --path ${model_path} \ + --results-path $results_path \ + \ + --scoring sacrebleu --max-len-a 0 --max-len-b 512 \ + --beam ${beam_size} \ + --lenpen $lenpen \ + # --model-overrides "{'model':{'w2v_path':'/path/to/your/pretrained/model.pt'}}" \ + + echo $results_path + tail -n 1 $results_path/generate-*.txt + sleep 1s diff --git a/SpeechT5/SpeechUT/speechut/squence_generator.py b/SpeechT5/SpeechUT/speechut/squence_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..730e92768322473bb247471e657ec2cd02a48b0f --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/squence_generator.py @@ -0,0 +1,1118 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- +""" + Modified from fairseq/fairseq/sequence_generator.py + 1. add joint ctc decoding (merged from espnet) + 2. add lm dict conversion +""" +import math +from typing import Dict, List, Optional +import sys +import inspect + +import torch +import torch.nn as nn +from fairseq import search, utils +from fairseq.data import data_utils +from fairseq.models import FairseqIncrementalDecoder +from torch import Tensor +from fairseq.ngram_repeat_block import NGramRepeatBlock +import numpy + +from speechut.modules.ctc_prefix_score import CTCPrefixScore + +MAX_CTC_BEAM = 33 +CTC_SCORING_RATIO = 4 + +class SequenceGenerator(nn.Module): + def __init__( + self, + models, + tgt_dict, + beam_size=1, + max_len_a=0, + max_len_b=200, + max_len=0, + min_len=1, + normalize_scores=True, + len_penalty=1.0, + unk_penalty=0.0, + temperature=1.0, + match_source_len=False, + no_repeat_ngram_size=0, + search_strategy=None, + eos=None, + bos=None, + symbols_to_strip_from_output=None, + lm_model=None, + lm_weight=1.0, + ctc_weight=0.0, + lm_dict=None, + ): + """Generates translations of a given source sentence. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models, + currently support fairseq.models.TransformerModel for scripting + beam_size (int, optional): beam width (default: 1) + max_len_a/b (int, optional): generate sequences of maximum length + ax + b, where x is the source length + max_len (int, optional): the maximum length of the generated output + (not including end-of-sentence) + min_len (int, optional): the minimum length of the generated output + (not including end-of-sentence) + normalize_scores (bool, optional): normalize scores by the length + of the output (default: True) + len_penalty (float, optional): length penalty, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + unk_penalty (float, optional): unknown word penalty, where <0 + produces more unks, >0 produces fewer (default: 0.0) + temperature (float, optional): temperature, where values + >1.0 produce more uniform samples and values <1.0 produce + sharper samples (default: 1.0) + match_source_len (bool, optional): outputs should match the source + length (default: False) + """ + super().__init__() + if isinstance(models, EnsembleModel): + self.model = models + else: + self.model = EnsembleModel(models) + self.tgt_dict = tgt_dict + self.pad = tgt_dict.pad() + self.unk = tgt_dict.unk() + self.eos = tgt_dict.eos() if eos is None else eos + self.bos = self.eos if bos is None else bos + self.blank = self.tgt_dict.index("") + self.symbols_to_strip_from_output = ( + symbols_to_strip_from_output.union({self.eos, self.bos}) + if symbols_to_strip_from_output is not None + else {self.eos, self.bos} + ) + self.vocab_size = len(tgt_dict) + self.beam_size = beam_size + # the max beam size is the dictionary size - 1, since we never select pad + self.beam_size = min(beam_size, self.vocab_size - 1) + self.max_len_a = max_len_a + self.max_len_b = max_len_b + self.min_len = min_len + self.max_len = max_len or self.model.max_decoder_positions() + + self.normalize_scores = normalize_scores + self.len_penalty = len_penalty + self.unk_penalty = unk_penalty + self.temperature = temperature + self.match_source_len = match_source_len + + if no_repeat_ngram_size > 0: + self.repeat_ngram_blocker = NGramRepeatBlock(no_repeat_ngram_size) + else: + self.repeat_ngram_blocker = None + + assert temperature > 0, "--temperature must be greater than 0" + + self.search = ( + search.BeamSearch(tgt_dict) if search_strategy is None else search_strategy + ) + # We only need to set src_lengths in LengthConstrainedBeamSearch. + # As a module attribute, setting it would break in multithread + # settings when the model is shared. + self.should_set_src_lengths = ( + hasattr(self.search, "needs_src_lengths") and self.search.needs_src_lengths + ) + + self.model.eval() + + self.lm_model = lm_model + self.lm_weight = lm_weight + self.ctc_weight = ctc_weight + if self.lm_model is not None: + self.lm_model.eval() + + # assume lm and ed model use different dicts, but the same vovab, + # align the LM dict to the ED dict + self.lm_dict = lm_dict + if lm_dict is not None and not tgt_dict.symbols == lm_dict.symbols: + self.lm_vocab_size = len(lm_dict) + assert self.lm_vocab_size <= self.vocab_size + self.dict_transform_forward = {} + for sym in self.lm_dict.symbols: + if self.tgt_dict.index(sym) != self.lm_dict.index(sym): + self.dict_transform_forward[self.tgt_dict.index(sym)] = self.lm_dict.index(sym) + # [32, 33] + self.dict_transform_back = torch.zeros(self.lm_vocab_size, self.vocab_size).float() + for syb in self.lm_dict.symbols: + self.dict_transform_back[self.lm_dict.index(syb)][self.tgt_dict.index(syb)] = 1.0 + + + def dict_transform(self, tokens, forward=True): + if self.lm_dict is None: + return tokens + if forward: + assert tokens.dim() == 2 + t_tokens = tokens.clone() + offset = self.vocab_size + for idx in self.dict_transform_forward: + t_tokens[t_tokens == idx] = idx + offset + for idx in self.dict_transform_forward: + t_tokens[t_tokens == idx + offset] = self.dict_transform_forward[idx] + for idx in range(self.lm_vocab_size, self.vocab_size): + t_tokens[t_tokens == idx] = self.tgt_dict.pad() + return t_tokens + + dict_transform_back = self.dict_transform_back.type_as(tokens) + tokens = torch.matmul(tokens, dict_transform_back) + for i in range(self.lm_vocab_size, self.vocab_size): + tokens[:, i] = -10000000 + return tokens + + def cuda(self): + self.model.cuda() + return self + + @torch.no_grad() + def forward( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + """Generate a batch of translations. + + Args: + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, prefix_tokens, bos_token=bos_token) + + # TODO(myleott): unused, deprecate after pytorch-translate migration + def generate_batched_itr(self, data_itr, beam_size=None, cuda=False, timer=None): + """Iterate over a batched dataset and yield individual translations. + Args: + cuda (bool, optional): use GPU for generation + timer (StopwatchMeter, optional): time generations + """ + for sample in data_itr: + s = utils.move_to_cuda(sample) if cuda else sample + if "net_input" not in s: + continue + input = s["net_input"] + # model.forward normally channels prev_output_tokens into the decoder + # separately, but SequenceGenerator directly calls model.encoder + encoder_input = { + k: v for k, v in input.items() if k != "prev_output_tokens" + } + if timer is not None: + timer.start() + with torch.no_grad(): + hypos = self.generate(encoder_input) + if timer is not None: + timer.stop(sum(len(h[0]["tokens"]) for h in hypos)) + for i, id in enumerate(s["id"].data): + # remove padding + src = utils.strip_pad(input["src_tokens"].data[i, :], self.pad) + ref = ( + utils.strip_pad(s["target"].data[i, :], self.pad) + if s["target"] is not None + else None + ) + yield id, src, ref, hypos[i] + + @torch.no_grad() + def generate( + self, models, sample: Dict[str, Dict[str, Tensor]], **kwargs + ) -> List[List[Dict[str, Tensor]]]: + """Generate translations. Match the api of other fairseq generators. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + constraints (torch.LongTensor, optional): force decoder to include + the list of constraints + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, **kwargs) + + def _generate( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + constraints: Optional[Tensor] = None, + bos_token: Optional[int] = None, + min_len_a: Optional[float] = None, + modal_idx: Optional[int] = -1, + ): + incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) + for i in range(self.model.models_size) + ], + ) + net_input = sample["net_input"] + + if "src_tokens" in net_input: + src_tokens = net_input["src_tokens"] + # length of the source text being the character length except EndOfSentence and pad + src_lengths = ( + (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1) + ) + elif "source" in net_input: + src_tokens = net_input["source"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + elif "features" in net_input: + src_tokens = net_input["features"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + else: + raise Exception( + "expected src_tokens or source in net input. input keys: " + + str(net_input.keys()) + ) + + # bsz: total number of sentences in beam + # Note that src_tokens may have more than 2 dimensions (i.e. audio features) + bsz, src_len = src_tokens.size()[:2] + beam_size = self.beam_size + + if constraints is not None and not self.search.supports_constraints: + raise NotImplementedError( + "Target-side constraints were provided, but search method doesn't support them" + ) + + # Initialize constraints, when active + self.search.init_constraints(constraints, beam_size) + + max_len: int = -1 + if self.match_source_len: + max_len = src_lengths.max().item() + else: + max_len = min( + int(self.max_len_a * src_len + self.max_len_b), + self.max_len - 1, + ) + + min_len = self.min_len if min_len_a is None else int(min_len_a * src_len + 1) + assert ( + min_len <= max_len + ), "min_len cannot be larger than max_len, please adjust these!" + # compute the encoder output for each beam + with torch.autograd.profiler.record_function("EnsembleModel: forward_encoder"): + encoder_outs = self.model.forward_encoder(net_input) + + dec_sos = sample["lang_idx"] if ("lang_idx" in sample and sample["lang_idx"] is not None) else (self.bos if bos_token is None else bos_token) + # Get CTC lprobs and prep ctc_scorer + if self.ctc_weight > 0: + ctc_lprobs = self.model.models[0].get_normalized_probs( + encoder_outs[0], log_probs=True + ).contiguous().transpose(0, 1) # (B, T, C) from the encoder + + hyp = {} + ctc_prefix_score = CTCPrefixScore(ctc_lprobs[0].detach().cpu().numpy(), self.blank, self.eos, numpy) + hyp["ctc_state_prev"] = ctc_prefix_score.initial_state() + hyp["ctc_score_prev"] = 0.0 + ctc_beam = min(ctc_lprobs.shape[-1], int(min(beam_size * CTC_SCORING_RATIO, MAX_CTC_BEAM))) + ctc_hyps = {str(dec_sos): hyp} + + # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores + new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) + new_order = new_order.to(src_tokens.device).long() + encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order) + # ensure encoder_outs is a List. + assert encoder_outs is not None + + # initialize buffers + scores = ( + torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float() + ) # +1 for eos; pad is never chosen for scoring + tokens = ( + torch.zeros(bsz * beam_size, max_len + 2) + .to(src_tokens) + .long() + .fill_(self.pad) + ) # +2 for eos and pad + tokens[:, 0] = dec_sos + attn: Optional[Tensor] = None + + # A list that indicates candidates that should be ignored. + # For example, suppose we're sampling and have already finalized 2/5 + # samples. Then cands_to_ignore would mark 2 positions as being ignored, + # so that we only finalize the remaining 3 samples. + cands_to_ignore = ( + torch.zeros(bsz, beam_size).to(src_tokens).eq(-1) + ) # forward and backward-compatible False mask + + # list of completed sentences + finalized = torch.jit.annotate( + List[List[Dict[str, Tensor]]], + [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)], + ) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step + + # a boolean array indicating if the sentence at the index is finished or not + finished = [False for i in range(bsz)] + num_remaining_sent = bsz # number of sentences remaining + + # number of candidate hypos per step + cand_size = 2 * beam_size # 2 x beam size in case half are EOS + + # offset arrays for converting between different indexing schemes + bbsz_offsets = ( + (torch.arange(0, bsz) * beam_size) + .unsqueeze(1) + .type_as(tokens) + .to(src_tokens.device) + ) + cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device) + + reorder_state: Optional[Tensor] = None + batch_idxs: Optional[Tensor] = None + + original_batch_idxs: Optional[Tensor] = None + if "id" in sample and isinstance(sample["id"], Tensor): + original_batch_idxs = sample["id"] + else: + original_batch_idxs = torch.arange(0, bsz).type_as(tokens) + + for step in range(max_len + 1): # one extra step for EOS marker + # reorder decoder internal states based on the prev choice of beams + if reorder_state is not None: + if batch_idxs is not None: + # update beam indices to take into account removed sentences + corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as( + batch_idxs + ) + reorder_state.view(-1, beam_size).add_( + corr.unsqueeze(-1) * beam_size + ) + original_batch_idxs = original_batch_idxs[batch_idxs] + self.model.reorder_incremental_state(incremental_states, reorder_state) + encoder_outs = self.model.reorder_encoder_out( + encoder_outs, reorder_state + ) + with torch.autograd.profiler.record_function( + "EnsembleModel: forward_decoder" + ): + lprobs, avg_attn_scores = self.model.forward_decoder( + tokens[:, : step + 1], + encoder_outs, + incremental_states, + self.temperature, + modal_idx, + ) + + if self.ctc_weight > 0 and step != 0 and step < ctc_prefix_score.input_length: + new_lprobs = lprobs.new_full(lprobs.size(), -math.inf) + ctc_lprobs = lprobs.clone() + ctc_lprobs[:, self.blank] = -math.inf # never select blank + _, local_best_ids = torch.topk(ctc_lprobs, ctc_beam, dim=-1) + for b in range(tokens.size(0)): + hyp_key = " ".join(str(x) for x in tokens[b, : step + 1].tolist()) + ctc_scores, ctc_states = ctc_prefix_score( + tokens[b, : step + 1].cpu(), local_best_ids[b].cpu(), ctc_hyps[hyp_key]["ctc_state_prev"] + ) + new_lprobs[b, local_best_ids[b]] = (1 - self.ctc_weight) * (lprobs[b, local_best_ids[b]]) + self.ctc_weight * torch.from_numpy( + ctc_scores - ctc_hyps[hyp_key]["ctc_score_prev"] + ).to(device="cuda") + for j in range(len(local_best_ids[b])): + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())] = {} + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_score_prev"] = ctc_scores[j] + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_state_prev"] = ctc_states[j] + lprobs = new_lprobs + elif self.ctc_weight > 0 and step == 0: + new_lprobs = lprobs.new_full(lprobs.size(), -math.inf) + ctc_lprobs = lprobs.clone() + ctc_lprobs[:, self.blank] = -math.inf # never select blank + _, local_best_ids = torch.topk(ctc_lprobs, ctc_beam, dim=-1) + for b in range(tokens.size(0)): + hyp_key = " ".join(str(x) for x in tokens[b, : step + 1].tolist()) + ctc_scores, ctc_states = ctc_prefix_score( + tokens[b, : step + 1].cpu(), local_best_ids[b].cpu(), ctc_hyps[hyp_key]["ctc_state_prev"] + ) + new_lprobs[b, local_best_ids[b]] = (1 - self.ctc_weight) * (lprobs[b, local_best_ids[b]]) + self.ctc_weight * torch.from_numpy( + ctc_scores - ctc_hyps[hyp_key]["ctc_score_prev"] + ).to(device="cuda") + for j in range(len(local_best_ids[b])): + if b == 0: + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())] = {} + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_score_prev"] = ctc_scores[j] + ctc_hyps[hyp_key + " " + str(local_best_ids[b][j].item())]["ctc_state_prev"] = ctc_states[j] + lprobs = new_lprobs + if self.lm_model is not None and self.lm_weight != 0: + if self.lm_dict is not None: + transformed_tokens = self.dict_transform(tokens[:, : step + 1]) + lm_out = self.lm_model(transformed_tokens) + else: + lm_out = self.lm_model(tokens[:, : step + 1]) + probs = self.lm_model.get_normalized_probs( + lm_out, log_probs=True, sample=None + ) + probs = probs[:, -1, :] * self.lm_weight + if self.lm_dict is not None: + probs = self.dict_transform(probs, forward=False) + lprobs += probs + # handle prefix tokens (possibly with different lengths) + if ( + prefix_tokens is not None + and step < prefix_tokens.size(1) + and step < max_len + ): + lprobs, tokens, scores = self._prefix_tokens( + step, lprobs, scores, tokens, prefix_tokens, beam_size + ) + elif step < min_len: + # minimum length constraint (does not apply if using prefix_tokens) + lprobs[:, self.eos] = -math.inf + + lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs) + + lprobs[:, self.pad] = -math.inf # never select pad + lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty + lprobs[:, self.blank] = -math.inf # never select blank + if dec_sos != self.eos: + lprobs[:, dec_sos] = -math.inf # never select lang id + + # handle max length constraint + if step >= max_len: + lprobs[:, : self.eos] = -math.inf + lprobs[:, self.eos + 1 :] = -math.inf + + # Record attention scores, only support avg_attn_scores is a Tensor + if avg_attn_scores is not None: + if attn is None: + attn = torch.empty( + bsz * beam_size, avg_attn_scores.size(1), max_len + 2 + ).to(scores) + attn[:, :, step + 1].copy_(avg_attn_scores) + + scores = scores.type_as(lprobs) + eos_bbsz_idx = torch.empty(0).to( + tokens + ) # indices of hypothesis ending with eos (finished sentences) + eos_scores = torch.empty(0).to( + scores + ) # scores of hypothesis ending with eos (finished sentences) + + if self.should_set_src_lengths: + self.search.set_src_lengths(src_lengths) + + if self.repeat_ngram_blocker is not None: + lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz, beam_size, step) + + # Shape: (batch, cand_size) + cand_scores, cand_indices, cand_beams = self.search.step( + step, + lprobs.view(bsz, -1, self.vocab_size), + scores.view(bsz, beam_size, -1)[:, :, :step], + tokens[:, : step + 1], + original_batch_idxs, + ) + + # cand_bbsz_idx contains beam indices for the top candidate + # hypotheses, with a range of values: [0, bsz*beam_size), + # and dimensions: [bsz, cand_size] + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + + # finalize hypotheses that end in eos + # Shape of eos_mask: (batch size, beam size) + eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf) + eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask) + + # only consider eos when it's among the top beam_size indices + # Now we know what beam item(s) to finish + # Shape: 1d list of absolute-numbered + eos_bbsz_idx = torch.masked_select( + cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents: List[int] = [] + if eos_bbsz_idx.numel() > 0: + eos_scores = torch.masked_select( + cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents = self.finalize_hypos( + step, + eos_bbsz_idx, + eos_scores, + tokens, + scores, + finalized, + finished, + beam_size, + attn, + src_lengths, + max_len, + ) + num_remaining_sent -= len(finalized_sents) + + assert num_remaining_sent >= 0 + if num_remaining_sent == 0: + break + if self.search.stop_on_max_len and step >= max_len: + break + assert step < max_len, f"{step} < {max_len}" + + # Remove finalized sentences (ones for which {beam_size} + # finished hypotheses have been generated) from the batch. + if len(finalized_sents) > 0: + new_bsz = bsz - len(finalized_sents) + + # construct batch_idxs which holds indices of batches to keep for the next pass + batch_mask = torch.ones( + bsz, dtype=torch.bool, device=cand_indices.device + ) + batch_mask[finalized_sents] = False + # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it + batch_idxs = torch.arange( + bsz, device=cand_indices.device + ).masked_select(batch_mask) + + # Choose the subset of the hypothesized constraints that will continue + self.search.prune_sentences(batch_idxs) + + eos_mask = eos_mask[batch_idxs] + cand_beams = cand_beams[batch_idxs] + bbsz_offsets.resize_(new_bsz, 1) + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + cand_scores = cand_scores[batch_idxs] + cand_indices = cand_indices[batch_idxs] + + if prefix_tokens is not None: + prefix_tokens = prefix_tokens[batch_idxs] + src_lengths = src_lengths[batch_idxs] + cands_to_ignore = cands_to_ignore[batch_idxs] + + scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + if attn is not None: + attn = attn.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, attn.size(1), -1 + ) + bsz = new_bsz + else: + batch_idxs = None + + # Set active_mask so that values > cand_size indicate eos hypos + # and values < cand_size indicate candidate active hypos. + # After, the min values per row are the top candidate active hypos + + # Rewrite the operator since the element wise or is not supported in torchscript. + + eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size])) + active_mask = torch.add( + eos_mask.type_as(cand_offsets) * cand_size, + cand_offsets[: eos_mask.size(1)], + ) + + # get the top beam_size active hypotheses, which are just + # the hypos with the smallest values in active_mask. + # {active_hypos} indicates which {beam_size} hypotheses + # from the list of {2 * beam_size} candidates were + # selected. Shapes: (batch size, beam size) + new_cands_to_ignore, active_hypos = torch.topk( + active_mask, k=beam_size, dim=1, largest=False + ) + + # update cands_to_ignore to ignore any finalized hypos. + cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size] + # Make sure there is at least one active item for each sentence in the batch. + assert (~cands_to_ignore).any(dim=1).all() + + # update cands_to_ignore to ignore any finalized hypos + + # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam + # can be selected more than once). + active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos) + active_scores = torch.gather(cand_scores, dim=1, index=active_hypos) + + active_bbsz_idx = active_bbsz_idx.view(-1) + active_scores = active_scores.view(-1) + + # copy tokens and scores for active hypotheses + + # Set the tokens for each beam (can select the same row more than once) + tokens[:, : step + 1] = torch.index_select( + tokens[:, : step + 1], dim=0, index=active_bbsz_idx + ) + # Select the next token for each of them + tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather( + cand_indices, dim=1, index=active_hypos + ) + if step > 0: + scores[:, :step] = torch.index_select( + scores[:, :step], dim=0, index=active_bbsz_idx + ) + scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather( + cand_scores, dim=1, index=active_hypos + ) + + # Update constraints based on which candidates were selected for the next beam + self.search.update_constraints(active_hypos) + + # copy attention for active hypotheses + if attn is not None: + attn[:, :, : step + 2] = torch.index_select( + attn[:, :, : step + 2], dim=0, index=active_bbsz_idx + ) + + # reorder incremental state in decoder + reorder_state = active_bbsz_idx + + # sort by score descending + for sent in range(len(finalized)): + scores = torch.tensor( + [float(elem["score"].item()) for elem in finalized[sent]] + ) + _, sorted_scores_indices = torch.sort(scores, descending=True) + finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices] + finalized[sent] = torch.jit.annotate( + List[Dict[str, Tensor]], finalized[sent] + ) + return finalized + + def _prefix_tokens( + self, step: int, lprobs, scores, tokens, prefix_tokens, beam_size: int + ): + """Handle prefix tokens""" + prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1) + prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1)) + prefix_mask = prefix_toks.ne(self.pad) + lprobs[prefix_mask] = torch.tensor(-math.inf).to(lprobs) + lprobs[prefix_mask] = lprobs[prefix_mask].scatter( + -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs[prefix_mask] + ) + # if prefix includes eos, then we should make sure tokens and + # scores are the same across all beams + eos_mask = prefix_toks.eq(self.eos) + if eos_mask.any(): + # validate that the first beam matches the prefix + first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[ + :, 0, 1 : step + 1 + ] + eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0] + target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step] + assert (first_beam == target_prefix).all() + + # copy tokens, scores and lprobs from the first beam to all beams + tokens = self.replicate_first_beam(tokens, eos_mask_batch_dim, beam_size) + scores = self.replicate_first_beam(scores, eos_mask_batch_dim, beam_size) + lprobs = self.replicate_first_beam(lprobs, eos_mask_batch_dim, beam_size) + return lprobs, tokens, scores + + def replicate_first_beam(self, tensor, mask, beam_size: int): + tensor = tensor.view(-1, beam_size, tensor.size(-1)) + tensor[mask] = tensor[mask][:, :1, :] + return tensor.view(-1, tensor.size(-1)) + + def finalize_hypos( + self, + step: int, + bbsz_idx, + eos_scores, + tokens, + scores, + finalized: List[List[Dict[str, Tensor]]], + finished: List[bool], + beam_size: int, + attn: Optional[Tensor], + src_lengths, + max_len: int, + ): + """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly. + A sentence is finalized when {beam_size} finished items have been collected for it. + + Returns number of sentences (not beam items) being finalized. + These will be removed from the batch and not processed further. + Args: + bbsz_idx (Tensor): + """ + assert bbsz_idx.numel() == eos_scores.numel() + + # clone relevant token and attention tensors. + # tokens is (batch * beam, max_len). So the index_select + # gets the newly EOS rows, then selects cols 1..{step + 2} + tokens_clone = tokens.index_select(0, bbsz_idx)[ + :, 1 : step + 2 + ] # skip the first index, which is EOS + + tokens_clone[:, step] = self.eos + attn_clone = ( + attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2] + if attn is not None + else None + ) + + # compute scores per token position + pos_scores = scores.index_select(0, bbsz_idx)[:, : step + 1] + pos_scores[:, step] = eos_scores + # convert from cumulative to per-position scores + pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] + + # normalize sentence-level scores + if self.normalize_scores: + eos_scores /= (step + 1) ** self.len_penalty + + # cum_unfin records which sentences in the batch are finished. + # It helps match indexing between (a) the original sentences + # in the batch and (b) the current, possibly-reduced set of + # sentences. + cum_unfin: List[int] = [] + prev = 0 + for f in finished: + if f: + prev += 1 + else: + cum_unfin.append(prev) + cum_fin_tensor = torch.tensor(cum_unfin, dtype=torch.int).to(bbsz_idx) + + unfin_idx = bbsz_idx // beam_size + sent = unfin_idx + torch.index_select(cum_fin_tensor, 0, unfin_idx) + + # Create a set of "{sent}{unfin_idx}", where + # "unfin_idx" is the index in the current (possibly reduced) + # list of sentences, and "sent" is the index in the original, + # unreduced batch + # For every finished beam item + # sentence index in the current (possibly reduced) batch + seen = (sent << 32) + unfin_idx + unique_seen: List[int] = torch.unique(seen).tolist() + + if self.match_source_len: + condition = step > torch.index_select(src_lengths, 0, unfin_idx) + eos_scores = torch.where(condition, torch.tensor(-math.inf), eos_scores) + sent_list: List[int] = sent.tolist() + for i in range(bbsz_idx.size()[0]): + # An input sentence (among those in a batch) is finished when + # beam_size hypotheses have been collected for it + if len(finalized[sent_list[i]]) < beam_size: + if attn_clone is not None: + # remove padding tokens from attn scores + hypo_attn = attn_clone[i] + else: + hypo_attn = torch.empty(0) + + finalized[sent_list[i]].append( + { + "tokens": tokens_clone[i], + "score": eos_scores[i], + "attention": hypo_attn, # src_len x tgt_len + "alignment": torch.empty(0), + "positional_scores": pos_scores[i], + } + ) + + newly_finished: List[int] = [] + for unique_s in unique_seen: + # check termination conditions for this sentence + unique_sent: int = unique_s >> 32 + unique_unfin_idx: int = unique_s - (unique_sent << 32) + + if not finished[unique_sent] and self.is_finished( + step, unique_unfin_idx, max_len, len(finalized[unique_sent]), beam_size + ): + finished[unique_sent] = True + newly_finished.append(unique_unfin_idx) + + return newly_finished + + def is_finished( + self, + step: int, + unfin_idx: int, + max_len: int, + finalized_sent_len: int, + beam_size: int, + ): + """ + Check whether decoding for a sentence is finished, which + occurs when the list of finalized sentences has reached the + beam size, or when we reach the maximum length. + """ + assert finalized_sent_len <= beam_size + if finalized_sent_len == beam_size or step == max_len: + return True + return False + + +class EnsembleModel(nn.Module): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__() + self.models_size = len(models) + # method '__len__' is not supported in ModuleList for torch script + self.single_model = models[0] + self.models = nn.ModuleList(models) + + self.has_incremental: bool = False + if all( + hasattr(m, "decoder") and isinstance(m.decoder, FairseqIncrementalDecoder) + for m in models + ): + self.has_incremental = True + + def forward(self): + pass + + def has_encoder(self): + return hasattr(self.single_model, "encoder") + + def has_incremental_states(self): + return self.has_incremental + + def max_decoder_positions(self): + return min( + [ + m.max_decoder_positions() + for m in self.models + if hasattr(m, "max_decoder_positions") + ] + + [sys.maxsize] + ) + + @torch.jit.export + def forward_encoder(self, net_input: Dict[str, Tensor]): + if not self.has_encoder(): + return None + return [model.encoder.forward_torchscript(net_input) for model in self.models] + + @torch.jit.export + def forward_decoder( + self, + tokens, + encoder_outs: List[Dict[str, List[Tensor]]], + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + temperature: float = 1.0, + modal_idx: int = -1, + ): + log_probs = [] + avg_attn: Optional[Tensor] = None + encoder_out: Optional[Dict[str, List[Tensor]]] = None + for i, model in enumerate(self.models): + if self.has_encoder(): + encoder_out = encoder_outs[i] + # decode each model + if self.has_incremental_states(): + if "modal_idx" in inspect.getfullargspec(model.decoder.forward).args: + decoder_out = model.decoder.forward( + tokens, + encoder_out=encoder_out, + incremental_state=incremental_states[i], + modal_idx=modal_idx, + ) + else: + decoder_out = model.decoder.forward( + tokens, + encoder_out=encoder_out, + incremental_state=incremental_states[i], + ) + else: + if hasattr(model, "decoder"): + decoder_out = model.decoder.forward(tokens, encoder_out=encoder_out) + else: + decoder_out = model.forward(tokens) + + attn: Optional[Tensor] = None + decoder_len = len(decoder_out) + if decoder_len > 1 and decoder_out[1] is not None: + if isinstance(decoder_out[1], Tensor): + attn = decoder_out[1] + else: + attn_holder = decoder_out[1]["attn"] + if isinstance(attn_holder, Tensor): + attn = attn_holder + elif attn_holder is not None: + attn = attn_holder[0] + if attn is not None: + attn = attn[:, -1, :] + + decoder_out_tuple = ( + decoder_out[0][:, -1:, :].div_(temperature), + None if decoder_len <= 1 else decoder_out[1], + ) + probs = model.get_normalized_probs( + decoder_out_tuple, log_probs=True, sample=None + ) + probs = probs[:, -1, :] + if self.models_size == 1: + return probs, attn + + log_probs.append(probs) + if attn is not None: + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + + avg_probs = torch.logsumexp(torch.stack(log_probs, dim=0), dim=0) - math.log( + self.models_size + ) + + if avg_attn is not None: + avg_attn.div_(self.models_size) + return avg_probs, avg_attn + + @torch.jit.export + def reorder_encoder_out( + self, encoder_outs: Optional[List[Dict[str, List[Tensor]]]], new_order + ): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + new_outs: List[Dict[str, List[Tensor]]] = [] + if not self.has_encoder(): + return new_outs + for i, model in enumerate(self.models): + assert encoder_outs is not None + new_outs.append( + model.encoder.reorder_encoder_out(encoder_outs[i], new_order) + ) + return new_outs + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + new_order, + ): + if not self.has_incremental_states(): + return + for i, model in enumerate(self.models): + model.decoder.reorder_incremental_state_scripting( + incremental_states[i], new_order + ) + + +class SequenceGeneratorWithAlignment(SequenceGenerator): + def __init__( + self, models, tgt_dict, left_pad_target=False, print_alignment="hard", **kwargs + ): + """Generates translations of a given source sentence. + + Produces alignments following "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + left_pad_target (bool, optional): Whether or not the + hypothesis should be left padded or not when they are + teacher forced for generating alignments. + """ + super().__init__(EnsembleModelWithAlignment(models), tgt_dict, **kwargs) + self.left_pad_target = left_pad_target + + if print_alignment == "hard": + self.extract_alignment = utils.extract_hard_alignment + elif print_alignment == "soft": + self.extract_alignment = utils.extract_soft_alignment + + @torch.no_grad() + def generate(self, models, sample, **kwargs): + finalized = super()._generate(sample, **kwargs) + + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + beam_size = self.beam_size + ( + src_tokens, + src_lengths, + prev_output_tokens, + tgt_tokens, + ) = self._prepare_batch_for_alignment(sample, finalized) + if any(getattr(m, "full_context_alignment", False) for m in self.model.models): + attn = self.model.forward_align(src_tokens, src_lengths, prev_output_tokens) + else: + attn = [ + finalized[i // beam_size][i % beam_size]["attention"].transpose(1, 0) + for i in range(bsz * beam_size) + ] + + if src_tokens.device != "cpu": + src_tokens = src_tokens.to("cpu") + tgt_tokens = tgt_tokens.to("cpu") + attn = [i.to("cpu") for i in attn] + + # Process the attn matrix to extract hard alignments. + for i in range(bsz * beam_size): + alignment = self.extract_alignment( + attn[i], src_tokens[i], tgt_tokens[i], self.pad, self.eos + ) + finalized[i // beam_size][i % beam_size]["alignment"] = alignment + return finalized + + def _prepare_batch_for_alignment(self, sample, hypothesis): + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + src_tokens = ( + src_tokens[:, None, :] + .expand(-1, self.beam_size, -1) + .contiguous() + .view(bsz * self.beam_size, -1) + ) + src_lengths = sample["net_input"]["src_lengths"] + src_lengths = ( + src_lengths[:, None] + .expand(-1, self.beam_size) + .contiguous() + .view(bsz * self.beam_size) + ) + prev_output_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=True, + ) + tgt_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=False, + ) + return src_tokens, src_lengths, prev_output_tokens, tgt_tokens + + +class EnsembleModelWithAlignment(EnsembleModel): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__(models) + + def forward_align(self, src_tokens, src_lengths, prev_output_tokens): + avg_attn = None + for model in self.models: + decoder_out = model(src_tokens, src_lengths, prev_output_tokens) + attn = decoder_out[1]["attn"][0] + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + if len(self.models) > 1: + avg_attn.div_(len(self.models)) + return avg_attn diff --git a/SpeechT5/SpeechUT/speechut/tasks/joint_sc2t_pretrain.py b/SpeechT5/SpeechUT/speechut/tasks/joint_sc2t_pretrain.py new file mode 100644 index 0000000000000000000000000000000000000000..db6e4e611f01d58f53ede5fd529fb9ceca44bcc8 --- /dev/null +++ b/SpeechT5/SpeechUT/speechut/tasks/joint_sc2t_pretrain.py @@ -0,0 +1,1004 @@ +# ---------------------------------------------------------------------------- +# SpeechUT: Bridging Speech and Text with Hidden-Unit for Encoder-Decoder Based Speech-Text Pre-training (https://arxiv.org/abs/2210.03730) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/SpeechUT +# Code based on fairseq: https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4 +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import os +import sys +from typing import Dict, List, Optional, Tuple +from pathlib import Path + +import numpy as np +from argparse import Namespace +from collections import OrderedDict + +import torch +from dataclasses import dataclass, field +from fairseq.data import ( + Dictionary, + encoders, + data_utils, + StripTokenDataset, + PrependTokenDataset, + AppendTokenDataset, + DenoisingDataset, + ConcatDataset, + FairseqDataset, + iterators, + ResamplingDataset, + MaskTokensDataset, + LanguagePairDataset, +) +from fairseq.data.audio.speech_to_text_joint_dataset import S2TJointDataConfig +from fairseq.data.shorten_dataset import maybe_shorten_dataset +# from fairseq.data.encoders.utils import get_whole_word_mask +from fairseq.dataclass.configs import FairseqDataclass +from fairseq.tasks import register_task +from fairseq.tasks.fairseq_task import FairseqTask +from fairseq.dataclass.constants import ChoiceEnum +from omegaconf import MISSING + +from speechut.data.multimodal_corpus_dataset import MultiCorpusDataset +from speechut.data.load_langpair_dataset import load_langpair_dataset +from speechut.data.language_trible_dataset import LanguageTripleDataset, load_langtriple_dataset +from speechut.data.hubert_dataset import HubertDataset + +logger = logging.getLogger(__name__) + +TOKENIZER_CHOICES = ChoiceEnum(["sentencepiece", "hubert_letters", "none"]) + +def _lang_token(lang: str): + return "".format(lang) + +def _lang_token_index(dic: Dictionary, lang: str): + """Return language token index.""" + idx = dic.index(_lang_token(lang)) + assert idx != dic.unk_index, "cannot find language token for lang {}".format(lang) + return idx + + +class LabelEncoder(object): + def __init__(self, dictionary: Dictionary) -> None: + self.dictionary = dictionary + + def __call__(self, label: str) -> List[str]: + return self.dictionary.encode_line( + label, append_eos=False, add_if_not_exist=False, + ) + + +### wrap the initial get_whole_word_mask which needs bpe_tokenizer, +### here we just assume words are splited by "|" or "" +def get_whole_word_mask(args, dictionary): + def is_beginning_of_word(i): + if i < dictionary.nspecial: + # special elements are always considered beginnings + return True + tok = dictionary[i] + if tok.startswith("madeupword"): + return True + elif tok in ["", "", "", "", "|", ""]: + return True + else: + return False + + mask_whole_words = torch.ByteTensor( + list(map(is_beginning_of_word, range(len(dictionary)))) + ) + return mask_whole_words + +def get_repeative_start(tokens): + """ + tokens: torch.Tensor with repeative tokens + """ + length = len(tokens) + rep_start_id = tokens[:-1] != tokens[1:] + return torch.cat([torch.tensor([True]), rep_start_id]) + +@dataclass +class TextPretrainingConfig(FairseqDataclass): + ### added for joint pretraining + text_data: Optional[str] = field( + default=None, + metadata={ + "help": "if set, path to text data directory", + }, + ) + seed: Optional[int] = field( + default=1, + metadata={ + "help": "for ordered_indices in MulticorpusDataset", + }, + ) + tokens_per_sample: Optional[int] = field( + default=512, + metadata={ + "help": "max number of total tokens over all segments per sample for dataset", + }, + ) + tokens_per_sample_tgt: Optional[int] = field( + default=512, + metadata={ + "help": "max number of total tokens over all segments per target sample for dataset", + }, + ) + sample_break_mode: Optional[str] = field( + default="eos", + metadata={ + "help": "mode for breaking sentence", + }, + ) + mask: Optional[float] = field( + default=0.3, + metadata={ + "help": "fraction of words/subwords that will be masked", + }, + ) + leave_unmasked_prob: float = field( + default=0.1, + metadata={"help": "probability that a masked token is unmasked"}, + ) + mask_random: Optional[float] = field( + default=0.1, + metadata={ + "help": "instead of using [MASK], use random token this often", + }, + ) + freq_weighted_replacement: bool = field( + default=False, + metadata={"help": "sample random replacement words based on word frequencies"}, + ) + mask_whole_words: bool = field( + default=True, + metadata={"help": "mask whole words; you may also want to set --bpe"}, + ) + mask_repeative_tokens: bool = field( + default=True, + metadata={"help": "mask repeative_tokens; if mask_whole_words=False"}, + ) + mask_multiple_length: int = field( + default=1, + metadata={"help": "repeat the mask indices multiple times"}, + ) + mask_stdev: float = field( + default=0.0, + metadata={"help": "stdev of the mask length"}, + ) + shorten_method: Optional[str] = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed tokens_per_sample", + "choices": "none/truncate/random_crop" + }, + ) + shorten_data_split_list: Optional[str] = field( + default="", + metadata={ + "help": "comma_separated list of dataset splits to apply shortening to, e.g., train,valid (default: all dataset splits)", + }, + ) + + ### below hypra-parameters is used in bart + insert: Optional[float] = field( + default=0.0, + metadata={ + "help": "insert this percentage of additional random tokens", + }, + ) + permute: Optional[float] = field( + default=0.0, + metadata={ + "help": "take this proportion of subwords and permute them", + }, + ) + rotate: Optional[float] = field( + default=0.0, + metadata={ + "help": "rotate this proportion of inputs", + }, + ) + poisson_lambda: Optional[float] = field( + default=3.5, + metadata={ + "help": "randomly shuffle sentences for this proportion of inputs", + }, + ) + permute_sentences: Optional[float] = field( + default=0.0, + metadata={ + "help": "shuffle this proportion of sentences in all inputs", + }, + ) + mask_length: Optional[str] = field( + default="span-poisson", + metadata={ + "help": "mask length to choose", + "choice": "subword/word/span-poisson" + }, + ) + replace_length: Optional[int] = field( + default=1, + metadata={ + "help": "when masking N tokens, replace with 0, 1, or N tokens (use -1 for N)", + }, + ) + shuffle_instance: Optional[bool] = field( + default=False, + metadata={"help": "shuffle instance"}, + ) + max_source_positions: Optional[int] = field( + default=1024, + metadata={"help": "max number of tokens in the source sequence"}, + ) + max_target_positions: Optional[int] = field( + default=1024, + metadata={"help": "max number of tokens in the target sequence"}, + ) + bpe: Optional[str] = field( + default="", + metadata={ + "help": "will wrapped by the text_data_config yaml", + }, + ) + data_config: Optional[str] = field( + default=None, + metadata={ + "help": "a config yaml specify the bpe model of text data", + }, + ) + text_maxtokens_ratio: Optional[float] = field( + default=1.0, + metadata={ + "help": "for text, max_tokens = max_tokens * text_maxtokens_ratio / 320 ", + }, + ) + prepend_tgt_lang_tag: bool = field( + default=False, + metadata={"help": "prepend tgt_lang_tag to replace "}, + ) + mask_text_ratio: Optional[float] = field( + default=0.0, + metadata={ + "help": "mask_text_ratio, for paired data", + }, + ) + truncate_mono_source: bool = field( + default=True, + metadata={"help": "truncate mono source-side examples that exceed max-positions"}, + ) + + +@dataclass +class JointPretrainingConfig(FairseqDataclass): + data: str = field( + default=MISSING, metadata={"help": "path to speech data directory"} + ) + fine_tuning: bool = field( + default=False, metadata={"help": "set to true if fine-tuning Hubert"} + ) + labels: List[str] = field( + default_factory=lambda: ["ltr"], + metadata={ + "help": ( + "extension of the label files to load, frame-level labels for" + " pre-training, and sequence-level label for fine-tuning" + ) + }, + ) + label_dir: Optional[str] = field( + default=None, + metadata={ + "help": "if set, looks for labels in this directory instead", + }, + ) + label_rate: int = field( + default=-1, + metadata={"help": "label frame rate. -1 for sequence label"}, + ) + sample_rate: int = field( + default=16_000, + metadata={ + "help": "target sample rate. audio files will be up/down " + "sampled to this rate" + }, + ) + normalize: bool = field( + default=False, + metadata={ + "help": "if set, normalizes input to have 0 mean and unit variance" + }, + ) + enable_padding: bool = field( + default=False, + metadata={"help": "pad shorter samples instead of cropping"}, + ) + max_keep_size: Optional[int] = field( + default=None, + metadata={"help": "exclude sample longer than this"}, + ) + max_sample_size: Optional[int] = field( + default=None, + metadata={"help": "max sample size to crop to for batching"}, + ) + min_sample_size: Optional[int] = field( + default=None, + metadata={"help": "min sample size to crop to for batching"}, + ) + single_target: Optional[bool] = field( + default=False, + metadata={ + "help": "if set, AddTargetDatasets outputs same keys " + "as AddTargetDataset" + }, + ) + random_crop: Optional[bool] = field( + default=True, + metadata={"help": "always crop from the beginning if false"}, + ) + pad_audio: Optional[bool] = field( + default=False, + metadata={"help": "pad audio to the longest one in the batch if true"}, + ) + store_labels: Optional[bool] = field( + default=True, + metadata={"help": "store spm labels in memory, should be true when fine-tune with bpe"}, + ) + add_decoder_target: bool = field( + default=False, + metadata={"help": "contral the model architecture, if set True, load reduced unit as target"}, + ) + split_modality_batch: bool = field( + default=False, + metadata={"help": "whether create all samples of different modalities in a batch"}, + ) + speech_tgt_lang: str = field( + default="", + metadata={"help": "prepend to prev_output_tokens to replace , only used for decoder"}, + ) + speech_sampling_alpha: float = field( + default=0.2, + metadata={ + "help": "Hyper-parameter alpha = 1/T for temperature-based speech resampling." + "(alpha = 1 for no resampling)" + }, + ) + text_sampling_alpha: float = field( + default=0.2, + metadata={ + "help": "Hyper-parameter alpha = 1/T for temperature-based text resampling." + "(alpha = 1 for no resampling)" + }, + ) + hubert_tokenizer: Optional[TOKENIZER_CHOICES] = field( + default="none", + metadata={"help": "which tokenizer for processing text"}, + ) + sp_path: Optional[str] = field( + default=None, + metadata={"help": "sentencepiece model path if using bpe tokenizer"}, + ) + text_cfg: TextPretrainingConfig = TextPretrainingConfig() + # For inference + ctc_weight: float = field( + default=0.0, + metadata={"help": "ctc weight during inference"}, + ) + lm_dict: Optional[str] = field( + default="dict.txt", + metadata={"help": "dict used for decoding with language model, should be in cfg.data/"}, + ) + +@register_task("joint_sc2t_pretraining", dataclass=JointPretrainingConfig) +class Jsc2tPretrainingTask(FairseqTask): + + cfg: JointPretrainingConfig + + def __init__( + self, + cfg: JointPretrainingConfig, + load_local_states: True, + ) -> None: + super().__init__(cfg) + + logger.info(f"current directory is {os.getcwd()}") + logger.info(f"JSTPretrainingTask Config {cfg}") + + self.cfg = cfg + self.fine_tuning = cfg.fine_tuning + self.blank_symbol = "" + + if load_local_states: + self.state.add_factory("hubert_tokenizer", self.build_tokenizer) + if self.cfg.text_cfg.text_data is not None and os.path.exists(self.cfg.text_cfg.text_data): + self.state.add_factory("text_dictionary", self.load_text_dictionary) + self.state.add_factory("text_src_dictionary", self.load_text_src_dictionary) + if cfg.fine_tuning: + self.state.add_factory("target_dictionary", self.load_dictionaries) + else: + self.state.add_factory("dictionaries", self.load_dictionaries) + + if cfg.text_cfg.data_config is not None: + self.text_data_cfg = S2TJointDataConfig(Path(f"{cfg.text_cfg.text_data}/{cfg.text_cfg.data_config}")) + self.cfg.text_cfg.bpe = self.text_data_cfg.bpe_tokenizer["bpe"] + else: + self.text_data_cfg = None + + @property + def source_dictionary(self) -> Optional[Dictionary]: + return None + + @property + def target_dictionary(self) -> Optional[Dictionary]: + return self.state.target_dictionary + + @property + def dictionaries(self) -> List[Dictionary]: + return self.state.dictionaries + + @property + def text_dictionary(self) -> Optional[Dictionary]: + return self.state.text_dictionary + + @property + def text_src_dictionary(self) -> Optional[Dictionary]: + return self.state.text_src_dictionary + + @property + def hubert_tokenizer(self): + return self.state.hubert_tokenizer + + def load_dictionaries(self): + label_dir = self.cfg.data if self.cfg.label_dir is None else self.cfg.label_dir + dictionaries = [Dictionary.load(f"{label_dir}/dict.{label}.txt") for label in self.cfg.labels] + if not self.cfg.fine_tuning: + for dictionary in dictionaries: + dictionary.add_symbol("") + return dictionaries[0] if self.cfg.fine_tuning else dictionaries + + def load_text_dictionary(self): + tgt_dict_path = f"{self.cfg.text_cfg.text_data}/{self.text_data_cfg.vocab_filename if self.text_data_cfg is not None else 'dict.txt'}" + if not os.path.isfile(tgt_dict_path): + raise FileNotFoundError(f"Dict not found: {tgt_dict_path}") + text_dictionary = Dictionary.load(tgt_dict_path) + self.mask_idx = text_dictionary.add_symbol("") + return text_dictionary + + def load_text_src_dictionary(self): + src_dict_path = f"{self.cfg.text_cfg.text_data}/{self.text_data_cfg.src_vocab_filename if self.text_data_cfg is not None else 'dict.txt'}" + if not os.path.isfile(src_dict_path): + raise FileNotFoundError(f"Dict not found: {src_dict_path}") + src_text_dictionary = Dictionary.load(src_dict_path) + self.mask_idx = src_text_dictionary.add_symbol("") + return src_text_dictionary + + @classmethod + def setup_task( + cls, cfg: JointPretrainingConfig, **kwargs + ) -> "Jsc2tPretrainingTask": + load_local_states = kwargs.get("load_local_states", True) + return cls(cfg, load_local_states) + + def get_label_dir(self) -> str: + if self.cfg.label_dir is None: + return self.cfg.data + return self.cfg.label_dir + + def load_paired_dataset(self, text_split, truncate_source=False): + text_split, lp = text_split.rsplit('.', 1) # e.g. "libritext.ltr-ltr" + if len(lp.split("-")) == 2: + src, tgt = lp.split("-") + if src == tgt: + logger.warn(f"| trying to load monolingual dataset {text_split}.{lp}, please check your task is right.") + paired_dataset = self.load_char_bart_dataset(f"{text_split}.{lp}.{tgt}") + return paired_dataset + paired_dataset = load_langpair_dataset( + self.cfg.text_cfg.text_data, + text_split, + src, + self.text_src_dictionary, + tgt, + self.text_dictionary, + combine=True, + dataset_impl=None, + upsample_primary=1, + left_pad_source=False, + left_pad_target=False, + max_source_positions=self.cfg.text_cfg.tokens_per_sample, + max_target_positions=self.cfg.text_cfg.tokens_per_sample, + truncate_source=truncate_source, + prepend_bos=False, + load_alignments=False, + append_source_id=True if self.cfg.text_cfg.prepend_tgt_lang_tag else False, + lang_format="" if self.cfg.text_cfg.prepend_tgt_lang_tag else "[{}]", + input_feeding=self.cfg.add_decoder_target, + ) + if self.cfg.text_cfg.mask_text_ratio > 0: + # add mask + self.mask_idx = self.text_src_dictionary.index("") + mask_whole_words = None + if self.cfg.text_cfg.mask_whole_words: + mask_whole_words = get_whole_word_mask(self.cfg.text_cfg, self.text_src_dictionary) + elif self.cfg.text_cfg.mask_repeative_tokens: + mask_whole_words = get_repeative_start + + src_dataset, src_unmasked_dataset = MaskTokensDataset.apply_mask( + paired_dataset.src, + self.text_src_dictionary, + pad_idx=self.text_src_dictionary.pad(), + mask_idx=self.mask_idx, + seed=self.cfg.text_cfg.seed, + mask_prob=self.cfg.text_cfg.mask_text_ratio, + leave_unmasked_prob=self.cfg.text_cfg.leave_unmasked_prob, + random_token_prob=self.cfg.text_cfg.mask_random, + freq_weighted_replacement=self.cfg.text_cfg.freq_weighted_replacement, + mask_whole_words=mask_whole_words, + mask_multiple_length=self.cfg.text_cfg.mask_multiple_length, + mask_stdev=self.cfg.text_cfg.mask_stdev, + ) + tgt_dataset = paired_dataset.tgt if paired_dataset.tgt is not None else src_unmasked_dataset + paired_dataset = LanguageTripleDataset( + src_dataset, + src_dataset.sizes, + self.text_src_dictionary, + src_unmasked_dataset, + src_unmasked_dataset.sizes, + self.text_src_dictionary, + tgt_dataset, + tgt_dataset.sizes, + self.text_dictionary, + left_pad_source=False, + left_pad_target=False, + align_dataset=None, + eos=None, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + ) + else: + src, ref, tgt = lp.split("-") + paired_dataset = load_langtriple_dataset( + self.cfg.text_cfg.text_data, + text_split, + src, + self.text_src_dictionary, + ref, + self.dictionaries[-1], + tgt, + self.text_dictionary, + combine=True, + dataset_impl=None, + upsample_primary=1, + left_pad_source=False, + left_pad_target=False, + max_source_positions=self.cfg.text_cfg.tokens_per_sample, + max_target_positions=self.cfg.text_cfg.tokens_per_sample, + truncate_source=truncate_source, + prepend_bos=False, + load_alignments=False, + append_source_id=True if self.cfg.text_cfg.prepend_tgt_lang_tag else False, + lang_format="" if self.cfg.text_cfg.prepend_tgt_lang_tag else "[{}]", + ) + return paired_dataset + + def load_dataset(self, split: str, epoch=1, **kwargs) -> None: + """ + Create Wav dataset for audio, and Index dataset for phonemized text, + then concatenate them to by fairseq.data.multi_corpus_dataset.MultiCorpusDataset. + """ + speech_splits = split.split('+')[0].split(',') + ### 1st, create a speech dataset using STSpeechDataset (modified from HubertDataset) + dicts = [self.target_dictionary] if self.cfg.fine_tuning else self.dictionaries + pad_list = [dict.pad() for dict in dicts] + eos_list = [dict.eos() for dict in dicts] + procs = [LabelEncoder(dict) for dict in dicts] + if self.cfg.speech_tgt_lang != "": + tgt_lang_idx = _lang_token_index(dicts[0], self.cfg.speech_tgt_lang) + logger.info(f"Will prepend <{tgt_lang_idx}> at the beginning of prev_output_tokens to replace ") + else: + tgt_lang_idx = None + + + # hubert v1: pad_audio=True, random_crop=False; + speech_datasets = [] + for speech_split in speech_splits: + paths = [ + f"{self.get_label_dir()}/{speech_split}.{l}" for l in self.cfg.labels + ] + speech_datasets.append( + HubertDataset( + f"{self.cfg.data}/{speech_split}.tsv", + sample_rate=self.cfg.sample_rate, + label_paths=paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_keep_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=self.cfg.store_labels, + random_crop=self.cfg.random_crop, + single_target=self.cfg.single_target, + tgt_dict=dicts[0], + add_decoder_target=self.cfg.add_decoder_target, + fine_tuning=self.cfg.fine_tuning, + tgt_lang_idx=tgt_lang_idx, + tokenizer=self.hubert_tokenizer, + ) + ) + if len(speech_datasets) > 1: + speech_dataset = ConcatDataset(speech_datasets) + else: + speech_dataset = speech_datasets[0] + + has_text = len(split.split('+')) > 1 + if not has_text: + assert speech_dataset is not None + self.datasets[split] = speech_dataset + return + + ### 2nd, create paired/mono text datasets using Langpairdataset + if split.split('+')[1] != '': + paired_splits = [paired_split for paired_split in split.split('+')[1].split(',') if paired_split != ''] + paired_datasets = [self.load_paired_dataset(paired_split) for paired_split in paired_splits] + else: + paired_splits, paired_datasets = [], [] + + if len(split.split('+')) > 2 and split.split('+')[2] != '': + mono_splits = [mono_split for mono_split in split.split('+')[2].split(',') if mono_split != ''] + mono_datasets = [self.load_paired_dataset(mono_split, truncate_source=self.cfg.text_cfg.truncate_mono_source) for mono_split in mono_splits] + else: + mono_splits, mono_datasets = [], [] + + assert len(mono_datasets + paired_datasets) > 0, f"split {split} has no text! you should check out for that" + + ### 3rd, if provided, create a supervised dataset with labeled data + if len(split.split('+')) > 3 and split.split('+')[3] != '': + assert len(paired_splits) > 0, f"supervised dataset can not be loaded without text paired dataset!" + tgt = paired_splits[0].rsplit('.', 1)[1].split("-")[1] + sup_split = split.split('+')[3] + + sup_dataset = HubertDataset( + f"{self.cfg.data}/{sup_split}.tsv", + sample_rate=self.cfg.sample_rate, + label_paths=[f"{self.get_label_dir()}/{sup_split}.{tgt}"], + label_rates=[-1], + pad_list=[self.text_dictionary.pad()], + eos_list=[self.text_dictionary.eos()], + label_processors=[LabelEncoder(self.text_dictionary)], + max_keep_sample_size=self.cfg.max_keep_size, + min_keep_sample_size=None, + max_sample_size=None, + pad_audio=True, + normalize=self.cfg.normalize, + store_labels=self.cfg.store_labels, + random_crop=False, + single_target=True, + tgt_dict=self.text_dictionary, + add_decoder_target=self.cfg.add_decoder_target, + fine_tuning=True, + tgt_lang_idx=None, + tokenizer=None, + ) + else: + sup_dataset = None + + ### 4th, compose a MultiCorpusDataset + dataset_dict, max_positions_dict, distributions, max_tokens_ratios = self.resample_multi_modality_dataset( + speech_dataset, sup_dataset, mono_datasets, paired_datasets, mono_splits, paired_splits, epoch=epoch, + ) + self.datasets[split] = MultiCorpusDataset( + dataset_dict, + max_positions=max_positions_dict, + distribution=distributions, + max_tokens_ratio=max_tokens_ratios, + seed=self.cfg.text_cfg.seed, + sort_indices=True, + ) + + + def max_positions(self) -> Tuple[int, int]: + return (sys.maxsize, sys.maxsize) + + def filter_indices_by_size( + self, indices: np.array, *args, **kwargs + ) -> np.array: + return indices + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=1, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + """ + Get an iterator that yields batches of data from the given dataset. + Args: + dataset (~fairseq.data.FairseqDataset): dataset to batch + max_tokens (int, optional): max number of tokens in each batch + (default: None). + max_sentences (int, optional): max number of sentences in each + batch (default: None). + max_positions (optional): max sentence length supported by the + model (default: None). + ignore_invalid_inputs (bool, optional): don't raise Exception for + sentences that are too long (default: False). + required_batch_size_multiple (int, optional): require batch size to + be a multiple of N (default: 1). + seed (int, optional): seed for random number generator for + reproducibility (default: 1). + num_shards (int, optional): shard the data iterator into N + shards (default: 1). + shard_id (int, optional): which shard of the data iterator to + return (default: 0). + num_workers (int, optional): how many subprocesses to use for data + loading. 0 means the data will be loaded in the main process + (default: 0). + epoch (int, optional): the epoch to start the iterator from + (default: 1). + data_buffer_size (int, optional): number of batches to + preload (default: 0). + disable_iterator_cache (bool, optional): don't cache the + EpochBatchIterator (ignores `FairseqTask::can_reuse_epoch_itr`) + (default: False). + skip_remainder_batch (bool, optional): if set, discard the last + batch in each training epoch, as the last batch is often smaller than + local_batch_size * distributed_word_size (default: ``True``). + grouped_shuffling (bool, optional): group batches with each groups + containing num_shards batches and shuffle groups. Reduces difference + between sequence lengths among workers for batches sorted by length. + update_epoch_batch_itr (bool optional): if true then donot use the cached + batch iterator for the epoch + + Returns: + ~fairseq.iterators.EpochBatchIterator: a batched iterator over the + given dataset split + """ + if self.fine_tuning or not isinstance(dataset, MultiCorpusDataset): + return super().get_batch_iterator( + dataset, + max_tokens=max_tokens, + max_sentences=max_sentences, + max_positions=max_positions, + ignore_invalid_inputs=ignore_invalid_inputs, + required_batch_size_multiple=required_batch_size_multiple, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + data_buffer_size=data_buffer_size, + disable_iterator_cache=disable_iterator_cache, + skip_remainder_batch=skip_remainder_batch, + grouped_shuffling=grouped_shuffling, + update_epoch_batch_itr=update_epoch_batch_itr, + ) + + can_reuse_epoch_itr = ( + not disable_iterator_cache + and not update_epoch_batch_itr + and self.can_reuse_epoch_itr(dataset) + ) + if can_reuse_epoch_itr and dataset in self.dataset_to_epoch_iter: + logger.debug("reusing EpochBatchIterator for epoch {}".format(epoch)) + return self.dataset_to_epoch_iter[dataset] + + assert isinstance(dataset, FairseqDataset) + + # initialize the dataset with the correct starting epoch + dataset.set_epoch(epoch) + + # get indices ordered by example size + with data_utils.numpy_seed(seed): + indices = dataset.ordered_indices() + + # filter examples that are too large + if max_positions is not None: + indices = self.filter_indices_by_size( + indices, dataset, max_positions, ignore_invalid_inputs + ) + + # create mini-batches with given size constraints + batch_sampler = dataset.get_batch_sampler( + indices, + num_shards, + seed, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + split_modality_batch=self.cfg.split_modality_batch, + ) + + # return a reusable, sharded iterator + epoch_iter = iterators.EpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_sampler=batch_sampler, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + buffer_size=data_buffer_size, + skip_remainder_batch=skip_remainder_batch, + disable_shuffling=True, + grouped_shuffling=grouped_shuffling, + ) + + if can_reuse_epoch_itr: + self.dataset_to_epoch_iter[dataset] = epoch_iter + + return epoch_iter + + def build_generator( + self, + models, + args, + seq_gen_cls=None, + extra_gen_cls_kwargs=None, + ): + """Build ED-CTC generator for finet-tuned ASR model""" + from speechut.squence_generator import SequenceGenerator + extra_gen_cls_kwargs = { + "ctc_weight": self.cfg.ctc_weight, + "lm_dict": Dictionary.load(os.path.join(self.cfg.data, self.cfg.lm_dict)), + **extra_gen_cls_kwargs + } + return super().build_generator( + models, args, seq_gen_cls=SequenceGenerator, extra_gen_cls_kwargs=extra_gen_cls_kwargs + ) + + @classmethod + def _get_size_ratios(cls, ids: List[str], sizes: List[int], alpha: float = 1.0): + """Size ratios for temperature-based sampling + (https://arxiv.org/abs/1907.05019)""" + _sizes = np.array(sizes) + prob = _sizes / _sizes.sum() + smoothed_prob = prob ** alpha + smoothed_prob = smoothed_prob / smoothed_prob.sum() + size_ratio = (smoothed_prob * _sizes.sum()) / _sizes + + o_str = str({_i: f"{prob[i]:.3f}" for i, _i in enumerate(ids)}) + logger.info(f"original sampling probability: {o_str}") + p_str = str({_i: f"{smoothed_prob[i]:.3f}" for i, _i in enumerate(ids)}) + logger.info(f"balanced sampling probability: {p_str}") + sr_str = str({_id: f"{size_ratio[i]:.3f}" for i, _id in enumerate(ids)}) + logger.info(f"balanced sampling size ratio: {sr_str}") + return size_ratio.tolist() + + def resample_multi_modality_dataset(self, speech_dataset, sup_dataset, mono_datasets, paired_datasets, mono_splits, paired_splits, epoch=1, train=True): + assert len(mono_datasets+paired_datasets) > 0, f"No text data loaded!" + + if len(mono_datasets) > 1 and self.cfg.text_sampling_alpha != 1.0: + size_ratios = self._get_size_ratios( + mono_splits, [len(s) for s in mono_datasets], alpha=self.cfg.text_sampling_alpha + ) + mono_datasets = [ + ResamplingDataset( + d, size_ratio=r, seed=0, epoch=epoch, replace=(r >= 1.0) + ) for d, r in zip(mono_datasets, size_ratios) + ] + + if len(paired_datasets) > 1 and self.cfg.text_sampling_alpha != 1.0: + size_ratios = self._get_size_ratios( + paired_splits, [len(s) for s in paired_datasets], alpha=self.cfg.text_sampling_alpha + ) + paired_datasets = [ + ResamplingDataset( + d, size_ratio=r, seed=0, epoch=epoch, replace=(r >= 1.0) + ) for d, r in zip(paired_datasets, size_ratios) + ] + + dataset_list = [speech_dataset, sup_dataset] + for datasets in [mono_datasets, paired_datasets]: + if len(datasets) > 1: + dataset_list.append(ConcatDataset(datasets)) + elif len(datasets) == 1: + dataset_list.append(datasets[0]) + else: + dataset_list.append(None) + + ### match speech/text datasets according to modality + dataset_dict = OrderedDict((name, d) for name, d in zip(["speech", "speech_sup", "text_mono", "text_paired"], dataset_list) if d is not None) + max_positions_dict = { + "speech": None, + "speech_sup": None, + "text_mono": (self.cfg.text_cfg.tokens_per_sample, self.cfg.text_cfg.tokens_per_sample), + "text_paired": (self.cfg.text_cfg.tokens_per_sample, self.cfg.text_cfg.tokens_per_sample), + } + max_positions_dict = OrderedDict((name, max_positions_dict[name]) for name in dataset_dict.keys()) + max_tokens_ratios_dict = { + "speech": 1.0, + "speech_sup": 1.0, + "text_mono": 1.0 / 320 / self.cfg.text_cfg.text_maxtokens_ratio, + "text_paired": 1.0 / 320 / self.cfg.text_cfg.text_maxtokens_ratio, + } + max_tokens_ratios = [max_tokens_ratios_dict[name] for name in dataset_dict.keys()] + dataset_lens = np.array([len(dataset) for dataset in dataset_dict.values()]) + dataset_avg_sample_lens = np.array([ + sum([dataset.num_tokens(i) for i in np.random.randint(low=0, high=len(dataset), size=10000)]) / 10000.0 + for dataset in dataset_dict.values() + ]) + + if not "speech" in dataset_dict: + distributions = [l / sum(dataset_lens) for l in dataset_lens] + else: + ## we just keep the batches of speech and non-speech the same, expand_coef is to ensure speech batches is less than others + first_ratio = dataset_lens[0] / sum(dataset_lens) + expand_coef = 1.2 if sup_dataset is None else 1.1 * sum(dataset_lens[0:2]) / dataset_lens[0] + distributions = [expand_coef * max_tokens_ratios[i] * dataset_avg_sample_lens[0] / l for (i, l) in enumerate(dataset_avg_sample_lens)] + distributions[0] = 1.0 + if sup_dataset is not None: + distributions[1] = dataset_lens[1] / dataset_lens[0] + distributions = [first_ratio * d for d in distributions] + + logging.info(f"Number samples of datasets is {dataset_lens}") + logging.info(f"Avg sample length of datasets is {dataset_avg_sample_lens}") + logging.info(f"Sampling distributions is {distributions}") + logging.info(f"Maxtokens ratio is {max_tokens_ratios}") + return dataset_dict, max_positions_dict, distributions, max_tokens_ratios + + def build_tokenizer(self, cfg=None): + logger.info(f"tokenizer: {self.cfg.hubert_tokenizer}") + if self.cfg.hubert_tokenizer != "none": + return encoders.build_bpe(Namespace(**{"bpe": self.cfg.hubert_tokenizer, "sentencepiece_model": self.cfg.sp_path})) + else: + return None + + def load_char_bart_dataset(self, split): + mono_dataset = data_utils.load_indexed_dataset( + f"{self.cfg.text_cfg.text_data}/{split}", + self.text_dictionary, + ) + mono_dataset = StripTokenDataset(mono_dataset, self.text_dictionary.eos()) + mono_dataset = maybe_shorten_dataset( + mono_dataset, + split, + self.cfg.text_cfg.shorten_data_split_list, + self.cfg.text_cfg.shorten_method, + self.cfg.text_cfg.tokens_per_sample - 2, + self.cfg.text_cfg.seed, + ) + logger.info("loaded {} samples from: {}".format(len(mono_dataset), mono_dataset)) + ### prepend bos and eos to dataset + mono_dataset = PrependTokenDataset(mono_dataset, self.text_dictionary.bos()) + mono_dataset = AppendTokenDataset(mono_dataset, self.text_dictionary.eos()) + mask_whole_words = ( + get_whole_word_mask(None, self.text_dictionary) + if self.cfg.text_cfg.mask_whole_words + else None + ) + lang=self.cfg.speech_tgt_lang + mono_dataset = DenoisingDataset( + mono_dataset, + mono_dataset.sizes, + self.text_dictionary, + self.mask_idx, + mask_whole_words, + shuffle=self.cfg.text_cfg.shuffle_instance, + seed=self.cfg.text_cfg.seed, + args=self.cfg.text_cfg, + tgt_lang_idx=_lang_token_index(self.text_dictionary, lang) if self.cfg.text_cfg.prepend_tgt_lang_tag else None, + ) + + return mono_dataset diff --git a/SpeechT5/VATLM/README.md b/SpeechT5/VATLM/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0594e6715ebc8346ec888fd388dd5775cfe99e38 --- /dev/null +++ b/SpeechT5/VATLM/README.md @@ -0,0 +1,135 @@ +# VATLM + + + [**VATLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning**](https://arxiv.org/abs/2211.11275) + + +- (Done) Nov. 2022: release the code and models +- Nov. 2022: release preprint in [arXiv](https://arxiv.org/abs/2211.11275) + +## Pre-Trained and Fine-tuned Models + +| Model | Pre-training Dataset | Fine-tuning Dataset | Model | +| :---------: | :----------------------------------------: | :-------------------: | :----------------------------------------------------------: | +| VatLM Base | LRS3 + paired audio+text+audio | - | [Google drive](https://drive.google.com/file/d/121ITJc22prpbd4sCy9bPWpdkKgGikkgm/view?usp=share_link) | +| VatLM Base | LRS3 + paired audio+text+audio | LRS-30h audio-visual | [Google drive](https://drive.google.com/file/d/1Bfbq0G-tASw3YrI3rzdpYgTE-UV-YaN0/view?usp=share_link) | +| VatLM Base | LRS3 + paired audio+text+audio | LRS-30h visual | [Google drive](https://drive.google.com/file/d/1qALD9obym0zCDoszVn2CzW0U3EUl-4v7/view?usp=share_link) | +| VatLM Base | VoxCeleb2 + LRS3 + paired audio+text+audio | - | [Google drive](https://drive.google.com/file/d/1piae9Row25OEfAekVz5Bxb9YnIVyEP0A/view?usp=share_link) | +| VatLM Base | VoxCeleb2 + LRS3 + paired audio+text+audio | LRS-30h audio-visual | [Google drive](https://drive.google.com/file/d/13JVuUi9gIIoUM888XcAOzvN7ioazn-cv/view?usp=share_link) | +| VatLM Base | VoxCeleb2 + LRS3 + paired audio+text+audio | LRS-30h visual | [Google drive](https://drive.google.com/file/d/1pAQHf60HgqDORGzyqEjdGTIywLKO3Ko5/view?usp=share_link) | +| VatLM Base | VoxCeleb2 + LRS3 + paired audio+text+audio | LRS-433h audio-visual | [Google drive](https://drive.google.com/file/d/1u9oMnivBelxznQcMDoM_u5EOfJuxnSuL/view?usp=share_link) | +| VatLM Base | VoxCeleb2 + LRS3 + paired audio+text+audio | LRS-433h visual | [Google drive](https://drive.google.com/file/d/1g107k5tL3XyvevSe0BzMqYOQFyFQG7jf/view?usp=share_link) | +| VatLM Large | VoxCeleb2 + LRS3 + paired audio+text+audio | - | [Google drive](https://drive.google.com/file/d/1_vbVFpKcaaPcCx2FtI-GyzVvxAhppg_b/view?usp=share_link) | +| VatLM Large | VoxCeleb2 + LRS3 + paired audio+text+audio | LRS-30h audio-visual | [Google drive](https://drive.google.com/file/d/1LyTCxceTZIqjVdMY6hlJjWolaIAZ0Mhs/view?usp=share_link) | +| VatLM Large | VoxCeleb2 + LRS3 + paired audio+text+audio | LRS-30h visual | [Google drive](https://drive.google.com/file/d/1CuyGg5O14F9Y_WCwpCVoKYbDKVtjBRQU/view?usp=share_link) | +| VatLM Large | VoxCeleb2 + LRS3 + paired audio+text+audio | LRS-433h audio-visual | [Google drive](https://drive.google.com/file/d/12orvO3xBuzdUDrBOqjW0mdGhV2Kmsy0Q/view?usp=share_link) | +| VatLM Large | VoxCeleb2 + LRS3 + paired audio+text+audio | LRS-433h visual | [Google drive](https://drive.google.com/file/d/17DDTUPs0BkaJtSUTiJHLBbymt2LCGo6e/view?usp=share_link) | + + + +## Setup + +To fine-tune or pre-train more models, please follow the instructions below. + +```bash +git clone https://github.com/microsoft/SpeechT5.git +cd SpeechT5/VATLM +git submodule init && git submodule update + +cd VATLM/fairseq && pip install --editable +cd VATLM/vat_hubert && pip install -r requirements.txt +``` + +## Data preparation + +1. For audio or visual data, please follow the steps of AV-HuBERT's [script](https://github.com/facebookresearch/av_hubert/tree/main/avhubert/preparation) to pre-process the data and get the corresponding `train.tsv`,` train.km` files. + +2. For unimodal audio data, the visual modality is replaced with a zero vector, and the features are extracted according to this [script](https://github.com/facebookresearch/av_hubert/tree/main/avhubert/preparation) and then kmeans [clustering](https://github.com/facebookresearch/av_hubert/tree/main/avhubert/clustering) is performed to get the corresponding labels. + +3. For unimodal text data, we use a small amount of pair text-audio data to obtain paired phone-unit data, and get the corresponding phoneme sequences by looking up the [lexicon](https://drive.google.com/file/d/1dh9NEx_cCF9_Aa0UcKyl9j00GXs6LmLQ/view?usp=sharing), and the unit data are obtained by extracting features and performing kmeans [clustering](https://github.com/facebookresearch/av_hubert/tree/main/avhubert/clustering). Then follow this [script](https://github.com/microsoft/SpeechT5/tree/main/SpeechLM#hidden-unit-tokenizer-for-text) to train the phone2unit model. + +## Pre-train + +- VatLM Base model (LRS3 + paired audio+text+audio) + + ```shell + cd VATLM/vat_hubert/vathubert/scripts/pretrain + ngpu=32 + updatefreq=1 + save_path=/path/to/save_path + + bash base_lsr3_pretrain_iter5.sh ${ngpu} ${updatefreq} ${save_path} + ``` + +- VatLM Base model (VoxCeleb2 + paired audio+text+audio) + + ```shell + cd VATLM/vat_hubert/vathubert/scripts/pretrain + ngpu=32 + updatefreq=1 + save_path=/path/to/save_path + + bash base_vox_pretrain_iter5.sh ${ngpu} ${updatefreq} ${save_path} + ``` + +- VatLM Large model (VoxCeleb2 + paired audio+text+audio) + + ```shell + cd VATLM/vat_hubert/vathubert/scripts/pretrain + ngpu=32 + updatefreq=2 + save_path=/path/to/save_path + + bash large_vox_pretrain_iter5.sh ${ngpu} ${updatefreq} ${save_path} + ``` + +## Fine-tune AVSR/VSR + +For example, the AVSR model can be obtained by fine-tuning the VatLM model using 30 hours of labeled data. + +```shell +cd VATLM/vat_hubert/vathubert/scripts/finetune_avsr +ngpu=8 +updatefreq=1 +save_path=/path/to/save_path + +bash base_lrs3_finetune30_av.sh ${ngpu} ${updatefreq} ${save_path} +``` + +## Decode + +For example, decoding the fine-tuned AVSR model. + +```sh +cd VATLM/vat_hubert/vathubert/ +data="test" +bash decode_avhubert_lrs3.sh ${data} +``` + +## License + +This project is licensed under the license found in the LICENSE file in the root directory of this source tree. +Portions of the source code are based on the [FAIRSEQ](https://github.com/pytorch/fairseq) and [av_hubert](https://github.com/facebookresearch/av_hubert) + +[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct) + +## Reference + +If you find our work is useful in your research, please cite the following paper: + +```bibtex +@article{zhu2022vatlm, + title={VATLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning}, + author={Qiushi Zhu and Long Zhou and Ziqiang Zhang and Shujie Liu and Binxing Jiao and Jie Zhang and Lirong Dai and Daxin Jiang and Jinyu Li and Furu Wei}, + year={2022}, + eprint={2211.11275}, + archivePrefix={arXiv}, +} +``` + +### Contact Information + +For help or issues using VatLM models, please submit a GitHub issue. + +For other communications related to VatLM, please contact Long Zhou (`lozhou@microsoft.com`). + diff --git a/SpeechT5/VATLM/vat_hubert/requirements.txt b/SpeechT5/VATLM/vat_hubert/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a177256afd477279936017c3830ffefef5e6ccc3 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/requirements.txt @@ -0,0 +1,6 @@ +python-speech-features==0.6 +scipy==1.5.4 +opencv-python==4.5.4.60 +sentencepiece==0.1.96 +editdistance==0.6.0 +kaldiio==2.17.2 \ No newline at end of file diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/__init__.py b/SpeechT5/VATLM/vat_hubert/vathubert/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1e16cee6d480e2dd8a959a3b1c30d410cf5b008d --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# from .hubert import * # noqa +# from .hubert_asr import * # noqa +# from .hubert_dataset import * +# from .hubert_pretraining import * +# from .hubert_criterion import * +from . import data, tasks, criterions, models \ No newline at end of file diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_lrs3_30h_av.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_lrs3_30h_av.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdae821e66bc580e08c6f179ee94a2d799e586a7 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_lrs3_30h_av.yaml @@ -0,0 +1,118 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + user_dir: ??? + +checkpoint: + save_interval: 2 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + is_s2s: true + data: ??? + label_dir: ??? + tokenizer_bpe_model: ??? + normalize: true # must be consistent with pre-training + labels: ["wrd"] + single_target: true + fine_tuning: true + stack_order_audio: 4 + tokenizer_bpe_name: sentencepiece + max_sample_size: 500 + modalities: ["video","audio"] + image_aug: true + pad_audio: true + random_crop: false + +dataset: + num_workers: 6 + max_tokens: 2000 + validate_after_updates: 0 + validate_interval: 2 + train_subset: train + valid_subset: valid + +criterion: + _name: label_smoothed_cross_entropy + report_accuracy: true + label_smoothing: 0.1 + +optimization: + max_update: 50000 + lr: [0.001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 15000 + hold_steps: 0 + decay_steps: 20000 + final_lr_scale: 0.05 + +model: + _name: vat_hubert_seq2seq + w2v_path: ??? + apply_mask: false + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 1.0 + decoder_layers: 6 + decoder_dropout: 0.1 + decoder_attention_dropout: 0.0 + decoder_activation_dropout: 0.1 + freeze_finetune_updates: 30000 + share_decoder_input_output_embed: true + decoder_normalize_before: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_lrs3_30h_v.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_lrs3_30h_v.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1672dbb682d0d20cee32b111268952ff7dbf6f11 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_lrs3_30h_v.yaml @@ -0,0 +1,118 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + user_dir: ??? + +checkpoint: + save_interval: 2 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + is_s2s: true + data: ??? + label_dir: ??? + tokenizer_bpe_model: ??? + normalize: true # must be consistent with pre-training + labels: ["wrd"] + single_target: true + fine_tuning: true + stack_order_audio: 4 + tokenizer_bpe_name: sentencepiece + max_sample_size: 500 + modalities: ["video"] + image_aug: true + pad_audio: true + random_crop: false + +dataset: + num_workers: 6 + max_tokens: 2000 + validate_after_updates: 0 + validate_interval: 2 + train_subset: train + valid_subset: valid + +criterion: + _name: label_smoothed_cross_entropy + report_accuracy: true + label_smoothing: 0.1 + +optimization: + max_update: 40000 + lr: [0.001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 10000 + hold_steps: 0 + decay_steps: 20000 + final_lr_scale: 0.05 + +model: + _name: vat_hubert_seq2seq + w2v_path: ??? + apply_mask: false + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 1.0 + decoder_layers: 6 + decoder_dropout: 0.1 + decoder_attention_dropout: 0.0 + decoder_activation_dropout: 0.1 + freeze_finetune_updates: 24000 + share_decoder_input_output_embed: true + decoder_normalize_before: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_30h_av.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_30h_av.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdae821e66bc580e08c6f179ee94a2d799e586a7 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_30h_av.yaml @@ -0,0 +1,118 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + user_dir: ??? + +checkpoint: + save_interval: 2 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + is_s2s: true + data: ??? + label_dir: ??? + tokenizer_bpe_model: ??? + normalize: true # must be consistent with pre-training + labels: ["wrd"] + single_target: true + fine_tuning: true + stack_order_audio: 4 + tokenizer_bpe_name: sentencepiece + max_sample_size: 500 + modalities: ["video","audio"] + image_aug: true + pad_audio: true + random_crop: false + +dataset: + num_workers: 6 + max_tokens: 2000 + validate_after_updates: 0 + validate_interval: 2 + train_subset: train + valid_subset: valid + +criterion: + _name: label_smoothed_cross_entropy + report_accuracy: true + label_smoothing: 0.1 + +optimization: + max_update: 50000 + lr: [0.001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 15000 + hold_steps: 0 + decay_steps: 20000 + final_lr_scale: 0.05 + +model: + _name: vat_hubert_seq2seq + w2v_path: ??? + apply_mask: false + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 1.0 + decoder_layers: 6 + decoder_dropout: 0.1 + decoder_attention_dropout: 0.0 + decoder_activation_dropout: 0.1 + freeze_finetune_updates: 30000 + share_decoder_input_output_embed: true + decoder_normalize_before: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_30h_v.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_30h_v.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdae821e66bc580e08c6f179ee94a2d799e586a7 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_30h_v.yaml @@ -0,0 +1,118 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + user_dir: ??? + +checkpoint: + save_interval: 2 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + is_s2s: true + data: ??? + label_dir: ??? + tokenizer_bpe_model: ??? + normalize: true # must be consistent with pre-training + labels: ["wrd"] + single_target: true + fine_tuning: true + stack_order_audio: 4 + tokenizer_bpe_name: sentencepiece + max_sample_size: 500 + modalities: ["video","audio"] + image_aug: true + pad_audio: true + random_crop: false + +dataset: + num_workers: 6 + max_tokens: 2000 + validate_after_updates: 0 + validate_interval: 2 + train_subset: train + valid_subset: valid + +criterion: + _name: label_smoothed_cross_entropy + report_accuracy: true + label_smoothing: 0.1 + +optimization: + max_update: 50000 + lr: [0.001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 15000 + hold_steps: 0 + decay_steps: 20000 + final_lr_scale: 0.05 + +model: + _name: vat_hubert_seq2seq + w2v_path: ??? + apply_mask: false + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 1.0 + decoder_layers: 6 + decoder_dropout: 0.1 + decoder_attention_dropout: 0.0 + decoder_activation_dropout: 0.1 + freeze_finetune_updates: 30000 + share_decoder_input_output_embed: true + decoder_normalize_before: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_433h_av.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_433h_av.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f39bda27b7b2dd0d217864bf2d27bec27ae59f65 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_433h_av.yaml @@ -0,0 +1,118 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + user_dir: ??? + +checkpoint: + save_interval: 2 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + is_s2s: true + data: ??? + label_dir: ??? + tokenizer_bpe_model: ??? + normalize: true # must be consistent with pre-training + labels: ["wrd"] + single_target: true + fine_tuning: true + stack_order_audio: 4 + tokenizer_bpe_name: sentencepiece + max_sample_size: 500 + modalities: ["video","audio"] + image_aug: true + pad_audio: true + random_crop: false + +dataset: + num_workers: 6 + max_tokens: 2000 + validate_after_updates: 0 + validate_interval: 2 + train_subset: train + valid_subset: valid + +criterion: + _name: label_smoothed_cross_entropy + report_accuracy: true + label_smoothing: 0.1 + +optimization: + max_update: 60000 + lr: [0.001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 20000 + hold_steps: 0 + decay_steps: 40000 + final_lr_scale: 0.05 + +model: + _name: vat_hubert_seq2seq + w2v_path: ??? + apply_mask: false + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 1.0 + decoder_layers: 6 + decoder_dropout: 0.1 + decoder_attention_dropout: 0.0 + decoder_activation_dropout: 0.1 + freeze_finetune_updates: 48000 + share_decoder_input_output_embed: true + decoder_normalize_before: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_433h_v.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_433h_v.yaml new file mode 100644 index 0000000000000000000000000000000000000000..773d638f28809f3dfbfbcf34ab08c0ed4729a133 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/base_vox_433h_v.yaml @@ -0,0 +1,118 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + user_dir: ??? + +checkpoint: + save_interval: 2 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + is_s2s: true + data: ??? + label_dir: ??? + tokenizer_bpe_model: ??? + normalize: true # must be consistent with pre-training + labels: ["wrd"] + single_target: true + fine_tuning: true + stack_order_audio: 4 + tokenizer_bpe_name: sentencepiece + max_sample_size: 500 + modalities: ["video"] + image_aug: true + pad_audio: true + random_crop: false + +dataset: + num_workers: 6 + max_tokens: 2000 + validate_after_updates: 0 + validate_interval: 2 + train_subset: train + valid_subset: valid + +criterion: + _name: label_smoothed_cross_entropy + report_accuracy: true + label_smoothing: 0.1 + +optimization: + max_update: 30000 + lr: [0.001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 10000 + hold_steps: 0 + decay_steps: 20000 + final_lr_scale: 0.05 + +model: + _name: vat_hubert_seq2seq + w2v_path: ??? + apply_mask: false + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 1.0 + decoder_layers: 6 + decoder_dropout: 0.1 + decoder_attention_dropout: 0.0 + decoder_activation_dropout: 0.1 + freeze_finetune_updates: 18000 + share_decoder_input_output_embed: true + decoder_normalize_before: true + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_30h_av.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_30h_av.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f712945a2616fef0c9a3f6bb40b1f7299a8c2182 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_30h_av.yaml @@ -0,0 +1,124 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + user_dir: ??? + +checkpoint: + save_interval: 2 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + is_s2s: true + data: ??? + label_dir: ??? + tokenizer_bpe_model: ??? + normalize: true # must be consistent with pre-training + labels: ["wrd"] + single_target: true + fine_tuning: true + stack_order_audio: 4 + tokenizer_bpe_name: sentencepiece + max_sample_size: 500 + modalities: ["video","audio"] + image_aug: true + pad_audio: true + random_crop: false + # noise_prob: 0.25 + # noise_snr: 0 + # noise_wav: ??? + +dataset: + num_workers: 6 + max_tokens: 1000 + validate_after_updates: 0 + validate_interval: 2 + train_subset: train + valid_subset: valid + +criterion: + _name: label_smoothed_cross_entropy + report_accuracy: true + label_smoothing: 0.1 + +optimization: + max_update: 60000 + lr: [0.001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 20000 + hold_steps: 0 + decay_steps: 40000 + final_lr_scale: 0.05 + +model: + _name: vat_hubert_seq2seq + w2v_path: ??? + apply_mask: false + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 1.0 + decoder_layers: 9 + decoder_dropout: 0.1 + decoder_attention_dropout: 0.0 + decoder_activation_dropout: 0.1 + freeze_finetune_updates: 48000 + share_decoder_input_output_embed: true + decoder_normalize_before: true + decoder_embed_dim: 1024 + decoder_ffn_embed_dim: 4096 + decoder_attention_heads: 8 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_30h_v.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_30h_v.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f40166828236122a8deca9514680b65a1cd3251d --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_30h_v.yaml @@ -0,0 +1,121 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + user_dir: ??? + +checkpoint: + save_interval: 2 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + is_s2s: true + data: ??? + label_dir: ??? + tokenizer_bpe_model: ??? + normalize: true # must be consistent with pre-training + labels: ["wrd"] + single_target: true + fine_tuning: true + stack_order_audio: 4 + tokenizer_bpe_name: sentencepiece + max_sample_size: 500 + modalities: ["video"] + image_aug: true + pad_audio: true + random_crop: false + +dataset: + num_workers: 6 + max_tokens: 1000 + validate_after_updates: 0 + validate_interval: 2 + train_subset: train + valid_subset: valid + +criterion: + _name: label_smoothed_cross_entropy + report_accuracy: true + label_smoothing: 0.1 + +optimization: + max_update: 18000 + lr: [0.001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 6000 + hold_steps: 0 + decay_steps: 12000 + final_lr_scale: 0.05 + +model: + _name: vat_hubert_seq2seq + w2v_path: ??? + apply_mask: false + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 1.0 + decoder_layers: 9 + decoder_dropout: 0.1 + decoder_attention_dropout: 0.0 + decoder_activation_dropout: 0.1 + freeze_finetune_updates: 14400 + share_decoder_input_output_embed: true + decoder_normalize_before: true + decoder_embed_dim: 1024 + decoder_ffn_embed_dim: 4096 + decoder_attention_heads: 8 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_433h_av.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_433h_av.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd08081605fbfd85eeb774087efc2d3c4d740242 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_433h_av.yaml @@ -0,0 +1,122 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + user_dir: ??? + +checkpoint: + save_interval: 2 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + is_s2s: true + data: ??? + label_dir: ??? + tokenizer_bpe_model: ??? + normalize: true # must be consistent with pre-training + labels: ["wrd"] + single_target: true + fine_tuning: true + stack_order_audio: 4 + tokenizer_bpe_name: sentencepiece + max_sample_size: 500 + modalities: ["video","audio"] + image_aug: true + pad_audio: true + random_crop: false + + +dataset: + num_workers: 6 + max_tokens: 1000 + validate_after_updates: 0 + validate_interval: 2 + train_subset: train + valid_subset: valid + +criterion: + _name: label_smoothed_cross_entropy + report_accuracy: true + label_smoothing: 0.1 + +optimization: + max_update: 60000 + lr: [0.001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 20000 + hold_steps: 0 + decay_steps: 40000 + final_lr_scale: 0.05 + +model: + _name: vat_hubert_seq2seq + w2v_path: ??? + apply_mask: false + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 1.0 + decoder_layers: 9 + decoder_dropout: 0.1 + decoder_attention_dropout: 0.0 + decoder_activation_dropout: 0.1 + freeze_finetune_updates: 48000 + share_decoder_input_output_embed: true + decoder_normalize_before: true + decoder_embed_dim: 1024 + decoder_ffn_embed_dim: 4096 + decoder_attention_heads: 8 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_433h_v.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_433h_v.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e31c848866567f6d927ef240f797ba37cc7aa787 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/finetune/large_vox_433h_v.yaml @@ -0,0 +1,121 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + user_dir: ??? + +checkpoint: + save_interval: 2 + keep_interval_updates: 1 + no_epoch_checkpoints: true + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: c10d + find_unused_parameters: true + distributed_world_size: 8 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + is_s2s: true + data: ??? + label_dir: ??? + tokenizer_bpe_model: ??? + normalize: true # must be consistent with pre-training + labels: ["wrd"] + single_target: true + fine_tuning: true + stack_order_audio: 4 + tokenizer_bpe_name: sentencepiece + max_sample_size: 500 + modalities: ["video"] + image_aug: true + pad_audio: true + random_crop: false + +dataset: + num_workers: 6 + max_tokens: 1000 + validate_after_updates: 0 + validate_interval: 2 + train_subset: train + valid_subset: valid + +criterion: + _name: label_smoothed_cross_entropy + report_accuracy: true + label_smoothing: 0.1 + +optimization: + max_update: 30000 + lr: [0.001] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + +lr_scheduler: + _name: tri_stage + warmup_steps: 10000 + hold_steps: 0 + decay_steps: 20000 + final_lr_scale: 0.05 + +model: + _name: vat_hubert_seq2seq + w2v_path: ??? + apply_mask: false + mask_selection: static + mask_length: 10 + mask_other: 0 + mask_prob: 0.75 + mask_channel_selection: static + mask_channel_length: 64 + mask_channel_other: 0 + mask_channel_prob: 0.5 + layerdrop: 0.1 + dropout: 0.0 + activation_dropout: 0.1 + attention_dropout: 0.0 + feature_grad_mult: 1.0 + decoder_layers: 9 + decoder_dropout: 0.1 + decoder_attention_dropout: 0.0 + decoder_activation_dropout: 0.1 + freeze_finetune_updates: 18000 + share_decoder_input_output_embed: true + decoder_normalize_before: true + decoder_embed_dim: 1024 + decoder_ffn_embed_dim: 4096 + decoder_attention_heads: 8 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/pretrain/base_lrs3_iter5.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/pretrain/base_lrs3_iter5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b67c97df3a960f0a758300b058db2fd3dec974e0 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/pretrain/base_lrs3_iter5.yaml @@ -0,0 +1,113 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + user_dir: ??? + empty_cache_freq: 10000 + +checkpoint: + save_interval: 5 + save_interval_updates: 25000 + keep_interval_updates: 1 + no_epoch_checkpoints: false + + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 32 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + data: ??? + label_dir: ??? + labels: ["km"] + label_rate: ${model.label_rate} + sample_rate: 25 + max_sample_size: 500 + min_sample_size: 5 + pad_audio: true + random_crop: false + normalize: true + stack_order_audio: 4 + # stack_order: 1 + input_modality: image + image_aug: true + +dataset: + num_workers: 6 + max_tokens: 1000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + validate_interval_updates: 10000 + +criterion: + _name: vat_hubert + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + +optimization: + max_update: 400000 + lr: [0.0005] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: vat_hubert + label_rate: ??? + skip_masked: false + skip_nomask: false + modality_dropout: 0.5 + audio_dropout: 0.5 + modality_fuse: concat + selection_type: same_seq + masking_type: input + mask_prob_image: 0.3 + mask_length_image: 10 + mask_prob_audio: 0.8 + mask_length_audio: 10 + extractor_mode: default + # conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + encoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + wav_input: false + layer_norm_first: true + audio_feat_dim: 104 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/pretrain/base_vox_iter5.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/pretrain/base_vox_iter5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f349a2bfbe6b74bc8435c25cac564be429986e7 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/pretrain/base_vox_iter5.yaml @@ -0,0 +1,113 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + user_dir: ??? + empty_cache_freq: 10000 + +checkpoint: + save_interval: 1 + save_interval_updates: 10000 + keep_interval_updates: 1 + no_epoch_checkpoints: false + + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 32 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + data: ??? + label_dir: ??? + labels: ["km"] + label_rate: ${model.label_rate} + sample_rate: 25 + max_sample_size: 500 + min_sample_size: 5 + pad_audio: true + random_crop: false + normalize: true + stack_order_audio: 4 + # stack_order: 1 + input_modality: image + image_aug: true + +dataset: + num_workers: 6 + max_tokens: 1000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + validate_interval_updates: 10000 + +criterion: + _name: vat_hubert + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + +optimization: + max_update: 400000 + lr: [0.002] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 64000 + +model: + _name: vat_hubert + label_rate: ??? + skip_masked: false + skip_nomask: false + modality_dropout: 0.5 + audio_dropout: 0.5 + modality_fuse: concat + selection_type: same_seq + masking_type: input + mask_prob_image: 0.3 + mask_length_image: 10 + mask_prob_audio: 0.8 + mask_length_audio: 10 + extractor_mode: default + # conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + encoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + wav_input: false + layer_norm_first: true + audio_feat_dim: 104 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/pretrain/large_vox_iter5.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/pretrain/large_vox_iter5.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e543801170b6931225548569a232f7c694580a18 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/pretrain/large_vox_iter5.yaml @@ -0,0 +1,118 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + user_dir: ??? + empty_cache_freq: 10000 + +checkpoint: + save_interval: 2 + save_interval_updates: 10000 + keep_interval_updates: 1 + no_epoch_checkpoints: false + + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 64 + # distributed_port: 29671 + nprocs_per_node: 8 + +task: + _name: vat_hubert_pretraining + data: ??? + label_dir: ??? + labels: ["km"] + label_rate: ${model.label_rate} + sample_rate: 25 + max_sample_size: 500 + min_sample_size: 5 + pad_audio: true + random_crop: false + normalize: true + stack_order_audio: 4 + # stack_order: 1 + input_modality: image + image_aug: true + # max_trim_sample_size: 400 + +dataset: + num_workers: 6 + max_tokens: 1000 + skip_invalid_size_inputs_valid_test: true + validate_interval: 5 + validate_interval_updates: 10000 + +criterion: + _name: vat_hubert + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + +optimization: + max_update: 600000 + lr: [0.002] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 48000 + +model: + _name: vat_hubert + label_rate: ??? + skip_masked: false + skip_nomask: false + modality_dropout: 0.5 + audio_dropout: 0.5 + modality_fuse: concat + selection_type: same_seq + masking_type: input + mask_prob_image: 0.3 + mask_length_image: 5 + mask_prob_audio: 0.8 + mask_length_audio: 10 + extractor_mode: default + # conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + encoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + wav_input: false + layer_norm_first: true + audio_feat_dim: 104 + encoder_layers: 24 + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + encoder_attention_heads: 16 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/conf/s2s_decode.yaml b/SpeechT5/VATLM/vat_hubert/vathubert/conf/s2s_decode.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce9279f49255cf3721e946bd5e57c4066ca1e0d8 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/conf/s2s_decode.yaml @@ -0,0 +1,23 @@ +common: + user_dir: ??? + +generation: + beam: 50 + max_len_a: 1.0 + max_len_b: 0 + lenpen: 1.0 + lm_weight: 0 + +common_eval: + results_path: ??? + path: ??? + +dataset: + max_tokens: 1000 + gen_subset: valid + num_workers: 0 + +override: + noise_prob: 0.0 + noise_snr: 0 + modalities: ??? diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/criterions/__init__.py b/SpeechT5/VATLM/vat_hubert/vathubert/criterions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b5266f3f9f137067b059f69d15d766a951247422 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/criterions/__init__.py @@ -0,0 +1,9 @@ +import importlib +import os + +for file in os.listdir(os.path.dirname(__file__)): + if file.endswith(".py") and not file.startswith("_"): + criterion_name = file[: file.find(".py")] + importlib.import_module( + "vathubert.criterions." + criterion_name + ) diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/criterions/vathubert_criterion.py b/SpeechT5/VATLM/vat_hubert/vathubert/criterions/vathubert_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..ec6fc59da699d5d3deb415cd5e963010c30beab6 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/criterions/vathubert_criterion.py @@ -0,0 +1,408 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- +import math +import re +from dataclasses import dataclass, field +from typing import List, Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.dataclass import FairseqDataclass + + +@dataclass +class VATHubertCriterionConfig(FairseqDataclass): + pred_masked_weight: float = field( + default=1.0, + metadata={"help": "weight for predictive loss for masked frames"}, + ) + pred_nomask_weight: float = field( + default=0.0, + metadata={"help": "weight for predictive loss for unmasked frames"}, + ) + loss_weights: Optional[List[float]] = field( + default=None, + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + log_keys: List[str] = field( + default_factory=lambda: [], + metadata={"help": "output keys to log"}, + ) + banlance_loss_weights: Optional[List[float]] = field( + default=None, + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + +@register_criterion("vat_hubert", dataclass=VATHubertCriterionConfig) +class VATHubertCriterion(FairseqCriterion): + def __init__(self, task, pred_masked_weight, pred_nomask_weight, banlance_loss_weights, loss_weights=None, log_keys=None): + super().__init__(task) + self.pred_masked_weight = pred_masked_weight + self.pred_nomask_weight = pred_nomask_weight + self.loss_weights = loss_weights + self.log_keys = [] if log_keys is None else log_keys + self.banlance_loss_weights = banlance_loss_weights + + def forward(self, model, sample, reduce=True, log_pred=False): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + + videoaudio_sample = sample.get("videoaudio", None) + audiotext_sample = sample.get("audiotext", None) + onlytext_sample = sample.get("onlytext", None) + onlyaudio_sample = sample.get("onlyaudio", None) + + + loss = 0. + loss1 = 0. + loss2 = 0. + loss3 = 0. + loss4 = 0. + sample_size = 0 + logging_output = {} + reduction = "sum" if reduce else "none" + + if videoaudio_sample is not None: + # print("videoaudio_sample") + net_output = model(target_list=videoaudio_sample["target_list"], **videoaudio_sample["net_input"]) + + loss_m_list = [] + logp_m_list, targ_m_list = net_output['logit_m_list'], net_output['target_m_list'] + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"loss_m_videoaudio_{i}"] = loss_m.detach().item() + if self.pred_masked_weight > 0: + loss1 += self.pred_masked_weight * sum(loss_m_list) + sample_size += targ_m_list[0].numel() + + loss_u_list = [] + logp_u_list, targ_u_list = net_output['logit_u_list'], net_output['target_u_list'] + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"loss_u_videoaudio_{i}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss1 += self.pred_nomask_weight * sum(loss_u_list) + sample_size += targ_u_list[0].numel() + + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len(self.loss_weights), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss1 += p + logging_output[f"loss_videoaudio_{n}"] = p.item() + + logging_output = { + "loss_video_audio": loss1.item() if reduce else loss1, + **logging_output, + } + + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + with torch.no_grad(): + for i, logp_m in enumerate(logp_m_list): + # corr_m, count_m = compute_correct(logp_m) + if logp_m.numel() == 0: + corr_m, count_m = 0 + else: + corr_m, count_m = (logp_m.argmax(dim=-1)==targ_m_list[i]).sum().item(), len(targ_m_list[i]) + logging_output[f"correct_m_videoaudio_{i}"] = corr_m + logging_output[f"count_m_videoaudio_{i}"] = count_m + + for i, logp_u in enumerate(logp_u_list): + if logp_u.numel() == 0: + corr_u, count_u = 0, 0 + else: + corr_u, count_u = (logp_u.argmax(dim=-1)==targ_u_list[i]).sum().item(), len(targ_u_list[i]) + logging_output[f"correct_u_videoaudio_{i}"] = corr_u + logging_output[f"count_u_videoaudio_{i}"] = count_u + + + if audiotext_sample is not None: + # print("audiotext_sample") + net_output = model(target_list=audiotext_sample["target_list"], targets_phone_list=audiotext_sample["targets_phone_list"], **audiotext_sample["net_input"]) + + loss_m_list = [] + logp_m_list, targ_m_list = net_output['logit_m_list'], net_output['target_m_list'] + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"loss_m_audiotext_{i}"] = loss_m.detach().item() + + + if self.pred_masked_weight > 0: + loss2 += self.pred_masked_weight * sum(loss_m_list) + sample_size += targ_m_list[0].numel() + + loss_u_list = [] + logp_u_list, targ_u_list = net_output['logit_u_list'], net_output['target_u_list'] + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"loss_u_audiotext_{i}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss2 += self.pred_nomask_weight * sum(loss_u_list) + sample_size += targ_u_list[0].numel() + + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len(self.loss_weights), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss2 += p + logging_output[f"loss_audiotext_{n}"] = p.item() + + + logging_output = { + "loss_audiotext": loss2.item() if reduce else loss2, + **logging_output, + } + + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + with torch.no_grad(): + for i, logp_m in enumerate(logp_m_list): + # corr_m, count_m = compute_correct(logp_m) + if logp_m.numel() == 0: + corr_m, count_m = 0 + else: + corr_m, count_m = (logp_m.argmax(dim=-1)==targ_m_list[i]).sum().item(), len(targ_m_list[i]) + logging_output[f"correct_m_audiotext_{i}"] = corr_m + logging_output[f"count_m_audiotext_{i}"] = count_m + + for i, logp_u in enumerate(logp_u_list): + if logp_u.numel() == 0: + corr_u, count_u = 0, 0 + else: + corr_u, count_u = (logp_u.argmax(dim=-1)==targ_u_list[i]).sum().item(), len(targ_u_list[i]) + logging_output[f"correct_u_audiotext_{i}"] = corr_u + logging_output[f"count_u_audiotext_{i}"] = count_u + + + if onlytext_sample is not None: + # print("onlytext_sample") + net_output = model(target_list=onlytext_sample["target_list"], extra_text_phone_list=onlytext_sample["extra_text_phone_list"], **onlytext_sample["net_input"]) + + loss_m_list = [] + logp_m_list, targ_m_list = net_output['logit_m_list'], net_output['target_m_list'] + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"loss_m_onlytext_{i}"] = loss_m.detach().item() + + + if self.pred_masked_weight > 0: + loss3 += self.pred_masked_weight * sum(loss_m_list) + sample_size += targ_m_list[0].numel() + + loss_u_list = [] + logp_u_list, targ_u_list = net_output['logit_u_list'], net_output['target_u_list'] + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"loss_u_onlytext_{i}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss3 += self.pred_nomask_weight * sum(loss_u_list) + sample_size += targ_u_list[0].numel() + + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len(self.loss_weights), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss3 += p + logging_output[f"loss_onlytext_{n}"] = p.item() + + + logging_output = { + "loss_onlytext": loss3.item() if reduce else loss3, + **logging_output, + } + + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + with torch.no_grad(): + for i, logp_m in enumerate(logp_m_list): + # corr_m, count_m = compute_correct(logp_m) + if logp_m.numel() == 0: + corr_m, count_m = 0 + else: + corr_m, count_m = (logp_m.argmax(dim=-1)==targ_m_list[i]).sum().item(), len(targ_m_list[i]) + logging_output[f"correct_m_onlytext_{i}"] = corr_m + logging_output[f"count_m_onlytext_{i}"] = count_m + + for i, logp_u in enumerate(logp_u_list): + if logp_u.numel() == 0: + corr_u, count_u = 0, 0 + else: + corr_u, count_u = (logp_u.argmax(dim=-1)==targ_u_list[i]).sum().item(), len(targ_u_list[i]) + logging_output[f"correct_u_onlytext_{i}"] = corr_u + logging_output[f"count_u_onlytext_{i}"] = count_u + + + if onlyaudio_sample is not None: + # print("onlytext_sample") + net_output = model(target_list=onlyaudio_sample["target_list"], **onlyaudio_sample["net_input"]) + + loss_m_list = [] + logp_m_list, targ_m_list = net_output['logit_m_list'], net_output['target_m_list'] + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"loss_m_onlyaudio_{i}"] = loss_m.detach().item() + + + if self.pred_masked_weight > 0: + loss4 += self.pred_masked_weight * sum(loss_m_list) + sample_size += targ_m_list[0].numel() + + loss_u_list = [] + logp_u_list, targ_u_list = net_output['logit_u_list'], net_output['target_u_list'] + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"loss_u_onlyaudio_{i}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss4 += self.pred_nomask_weight * sum(loss_u_list) + sample_size += targ_u_list[0].numel() + + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len(self.loss_weights), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss4 += p + logging_output[f"loss_onlyaudio_{n}"] = p.item() + + + logging_output = { + "loss_onlyaudio": loss4.item() if reduce else loss4, + **logging_output, + } + + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + with torch.no_grad(): + for i, logp_m in enumerate(logp_m_list): + # corr_m, count_m = compute_correct(logp_m) + if logp_m.numel() == 0: + corr_m, count_m = 0 + else: + corr_m, count_m = (logp_m.argmax(dim=-1)==targ_m_list[i]).sum().item(), len(targ_m_list[i]) + logging_output[f"correct_m_onlyaudio_{i}"] = corr_m + logging_output[f"count_m_onlyaudio_{i}"] = count_m + + for i, logp_u in enumerate(logp_u_list): + if logp_u.numel() == 0: + corr_u, count_u = 0, 0 + else: + corr_u, count_u = (logp_u.argmax(dim=-1)==targ_u_list[i]).sum().item(), len(targ_u_list[i]) + logging_output[f"correct_u_onlyaudio_{i}"] = corr_u + logging_output[f"count_u_onlyaudio_{i}"] = count_u + + + + loss = loss1 + loss2 + self.banlance_loss_weights[0] * loss3 + self.banlance_loss_weights[1] * loss4 + + logging_output = { + "loss": loss.item() if reduce else loss, + "ntokens": sample_size, + "nsentences": sample["videoaudio"]["id"].numel(), + "sample_size": sample_size, + **logging_output, + } + + return loss, sample_size, logging_output + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training (copied from normal cross entropy).""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar("loss", loss_sum / sample_size / math.log(2), sample_size, round=3) + if sample_size != ntokens: + metrics.log_scalar("nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3) + metrics.log_derived("ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)) + else: + metrics.log_derived("ppl", lambda meters: utils.get_perplexity(meters["loss"].avg)) + + counts = {} + for lk in logging_outputs[0].keys(): + if lk.startswith("count_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val) + counts[lk] = val + + for lk in logging_outputs[0].keys(): + if lk.startswith("loss_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / sample_size / math.log(2), round=3) + elif lk.startswith("correct_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)]) + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + raise NotImplementedError() + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/data/audiohubert_dataset.py b/SpeechT5/VATLM/vat_hubert/vathubert/data/audiohubert_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..90c2e0748c65418a92e483a1e3f3cc9189b55b35 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/data/audiohubert_dataset.py @@ -0,0 +1,509 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import itertools +import logging +import os +import sys +import time +from typing import Any, List, Optional, Union + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils +from fairseq.data.fairseq_dataset import FairseqDataset +# from python_speech_features import logfbank +from scipy.io import wavfile +import kaldiio + +DBG=True if len(sys.argv) == 1 else False + +if DBG: + import utils as custom_utils + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "DEBUG").upper(), + stream=sys.stdout, + ) +else: + from . import utils as custom_utils + +logger = logging.getLogger(__name__) + + +def load_audio(manifest_path, max_keep, min_keep, frame_rate, label_paths, label_rates, tol=0.1): + def is_audio_label_aligned(audio_dur, label_durs): + return all([abs(audio_dur - label_dur) max_keep: + n_long += 1 + elif (not is_seq_label) and (not is_audio_label_aligned(sz/frame_rate, dur_from_label_list[ind])): + n_unaligned += 1 + else: + audio_path = items[0] + names.append(os.path.join(root, audio_path)) + inds.append(ind) + sizes.append(sz) + tot = ind + 1 + logger.info( + ( + f"label_rates={label_rates}, " + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long and {n_unaligned} unaligned, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, names, inds, tot, sizes + + + +def load_label(label_path, inds, tot): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + +def load_phone_label(tsv, inds, tot): + with open(tsv) as f: + labels = [line.rstrip().split("\t")[-1] for line in f.readlines()[1:]] + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds, tot): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + assert ( + len(code_lengths) == tot + ), f"number of labels does not match ({len(code_lengths)} != {tot})" + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + +def verify_label_lengths( + audio_sizes, + audio_rate, + label_path, + label_rate, + inds, + tot, + tol=0.1, # tolerance in seconds +): + if label_rate < 0: + logger.info(f"{label_path} is sequence label. skipped") + return + + with open(label_path) as f: + lengths = [len(line.rstrip().split()) for line in f] + assert len(lengths) == tot + lengths = [lengths[i] for i in inds] + num_invalid = 0 + for i, ind in enumerate(inds): + dur_from_audio = audio_sizes[i] / audio_rate + dur_from_label = lengths[i] / label_rate + if abs(dur_from_audio - dur_from_label) > tol: + logger.warning( + ( + f"audio and label duration differ too much " + f"(|{dur_from_audio} - {dur_from_label}| > {tol}) " + f"in line {ind+1} of {label_path}. Check if `label_rate` " + f"is correctly set (currently {label_rate}). " + f"num. of samples = {audio_sizes[i]}; " + f"label length = {lengths[i]}" + ) + ) + num_invalid += 1 + if num_invalid > 0: + logger.warning( + f"total {num_invalid} (audio, label) pairs with mismatched lengths" + ) + + +class AudioHubertDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + phone_sequence_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + single_target: bool = False, + stack_order_audio: int=1, + skip_verify: bool=False, + is_s2s=False, + ): + self.label_rates = ( + [label_rates for _ in range(len(label_paths))] + if isinstance(label_rates, int) + else label_rates + ) + self.audio_root, self.names, inds, tot, self.sizes = load_audio(manifest_path, max_keep_sample_size, min_keep_sample_size, frame_rate=sample_rate, label_paths=label_paths, label_rates=self.label_rates) + self.sample_rate = sample_rate + self.stack_order_audio = stack_order_audio + self.shuffle = shuffle + + self.num_labels = len(label_paths) + self.pad_list = pad_list + self.eos_list = eos_list + self.label_processors = label_processors + self.phone_processors = phone_sequence_processors + self.single_target = single_target + self.store_labels = store_labels + self.is_s2s = is_s2s + + assert self.single_target == (self.label_rates[0] == -1), f"single target should be equivalent to sequence label (label_rate==-1)" + if store_labels: + self.label_list = [load_label(p, inds, tot) for p in label_paths] + self.phone_list = [load_phone_label(p, inds, tot) for p in [manifest_path]] + + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds, tot) for p in label_paths + ] + assert ( + label_processors is None + or len(label_processors) == self.num_labels + ) + if not skip_verify: + for label_path, label_rate in zip(label_paths, self.label_rates): + verify_label_lengths(self.sizes, self.sample_rate, label_path, label_rate, inds, tot) + else: + logger.info(f"Skip label alignment verifying") + + self.max_sample_size = ( + max_sample_size if max_sample_size is not None else sys.maxsize + ) + self.pad_audio = pad_audio + self.normalize = normalize + + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + def get_phone(self, index, label_idx): + label = self.phone_list[label_idx][index] + if self.phone_processors is not None: + label = self.phone_processors[label_idx](label) + return label + + def get_phones(self, index): + return [self.get_phone(index, i) for i in range(1)] + + + def load_feature(self, mix_name): + """ + Load audio feature + Returns: + audio_feats: numpy.ndarray of shape [T, F] + """ + def stacker(feats, stack_order): + """ + Concatenating consecutive audio frames + Args: + feats - numpy.ndarray of shape [T, F] + stack_order - int (number of neighboring frames to concatenate + Returns: + feats - numpy.ndarray of shape [T', F'] + """ + feat_dim = feats.shape[1] + if len(feats) % stack_order != 0: + res = stack_order - len(feats) % stack_order + res = np.zeros([res, feat_dim]).astype(feats.dtype) + feats = np.concatenate([feats, res], axis=0) + feats = feats.reshape((-1, stack_order, feat_dim)).reshape(-1, stack_order*feat_dim) + return feats + audio_fn = mix_name + + # sample_rate, wav_data = wavfile.read(audio_fn) + # assert sample_rate == 16_000 and len(wav_data.shape) == 1 + # audio_feats = logfbank(wav_data, samplerate=sample_rate).astype(np.float32) # [T, F] + audio_feats = kaldiio.load_mat(audio_fn).astype(np.float32) + + audio_feats = stacker(audio_feats, self.stack_order_audio) # [T/stack_order_audio, F*stack_order_audio] + return audio_feats + + + def __getitem__(self, index): + audio_feats = self.load_feature(self.names[index]) + audio_feats = torch.from_numpy(audio_feats.astype(np.float32)) + if self.normalize: + with torch.no_grad(): + audio_feats = F.layer_norm(audio_feats, audio_feats.shape[1:]) + labels = self.get_labels(index) + phone_sequence_list = self.get_phones(index) + + + return {"id": index, 'audio_source': audio_feats, "label_list": labels, "phone_sequence_list": phone_sequence_list} + + + def __len__(self): + return len(self.sizes) + + def crop_to_max_size(self, wav, target_size, start=None): + size = len(wav) + diff = size - target_size + if diff <= 0: + return wav, 0 + # longer utterances + if start is None: + start, end = 0, target_size + # if self.random_crop: + # start = np.random.randint(0, diff + 1) + # end = size - diff + start + else: + end = start + target_size + return wav[start:end], start + + def collater(self, samples): + samples = [s for s in samples if s["id"] is not None] + if len(samples) == 0: + return {} + + audio_source = [s["audio_source"] for s in samples] + if audio_source[0] is None: + audio_source = None + if audio_source is not None: + audio_sizes = [len(s) for s in audio_source] + + if self.pad_audio: + audio_size = min(max(audio_sizes), self.max_sample_size) + else: + audio_size = min(min(audio_sizes), self.max_sample_size) + if audio_source is not None: + collated_audios, padding_mask, audio_starts = self.collater_audio(audio_source, audio_size) + else: + collated_audios, audio_starts = None, None + + # B1, D1, T1 = collated_audios.size() + # collated_videos = torch.from_numpy(np.zeros((B1, 1, T1, 88, 88)).astype(np.float32)) + + targets_by_label = [ + [s["label_list"][i] for s in samples] + for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, audio_size, audio_starts + ) + + ############################################################################ + phone_sequence_list = [s["phone_sequence_list"] for s in samples] + if phone_sequence_list[0] is None: + phone_sequence_list = None + + targets_by_phone_label = [ + [s["phone_sequence_list"][i] for s in samples] + for i in range(self.num_labels) + ] + targets_phone_list, lengths_phone_list, ntokens_phone_list = self.collater_phone_label( + targets_by_phone_label, audio_size, audio_starts + ) + + # print("targets_phone_list", targets_phone_list) + ###################################################### + + # source = {"audio": collated_audios, "video": collated_videos} + source = {"audio": collated_audios, "video": None} + net_input = {"source": source, "padding_mask": padding_mask} + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + if self.is_s2s: + batch['target'], net_input['prev_output_tokens'] = targets_list[0][0], targets_list[0][1] + else: + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + + batch["targets_phone_list"] = targets_phone_list + + return batch + + def collater_audio(self, audios, audio_size, audio_starts=None): + audio_feat_shape = list(audios[0].shape[1:]) + collated_audios = audios[0].new_zeros([len(audios), audio_size]+audio_feat_shape) + padding_mask = ( + torch.BoolTensor(len(audios), audio_size).fill_(False) # + ) + start_known = audio_starts is not None + audio_starts = [0 for _ in audios] if not start_known else audio_starts + for i, audio in enumerate(audios): + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + assert self.pad_audio + collated_audios[i] = torch.cat( + [audio, audio.new_full([-diff]+audio_feat_shape, 0.0)] + ) + padding_mask[i, diff:] = True + else: + collated_audios[i], audio_starts[i] = self.crop_to_max_size( + audio, audio_size, audio_starts[i] if start_known else None + ) + if len(audios[0].shape) == 2: + collated_audios = collated_audios.transpose(1, 2) # [B, T, F] -> [B, F, T] + else: + collated_audios = collated_audios.permute((0, 4, 1, 2, 3)).contiguous() # [B, T, H, W, C] -> [B, C, T, H, W] + return collated_audios, padding_mask, audio_starts + + def collater_frm_label( + self, targets, audio_size, audio_starts, label_rate, pad + ): + assert label_rate > 0 + s2f = label_rate / self.sample_rate # num label per sample + frm_starts = [int(round(s * s2f)) for s in audio_starts] + frm_size = int(round(audio_size * s2f)) + if not self.pad_audio: + rem_size = [len(t) - s for t, s in zip(targets, frm_starts)] + frm_size = min(frm_size, *rem_size) + targets = [t[s: s + frm_size] for t, s in zip(targets, frm_starts)] + logger.debug(f"audio_starts={audio_starts}") + logger.debug(f"frame_starts={frm_starts}") + logger.debug(f"frame_size={frm_size}") + + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + def collater_frm_phone_label( + self, targets, pad + ): + + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + + def collater_seq_label_s2s(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + pad, eos = self.label_processors[0].dictionary.pad(), self.label_processors[0].dictionary.eos() + targets_ = data_utils.collate_tokens(targets, pad_idx=pad, eos_idx=eos, left_pad=False) + prev_output_tokens = data_utils.collate_tokens(targets, pad_idx=pad, eos_idx=eos, left_pad=False, move_eos_to_beginning=True) + return (targets_, prev_output_tokens), lengths, ntokens + + def collater_label(self, targets_by_label, audio_size, audio_starts): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + if label_rate == -1: + if self.is_s2s: + targets, lengths, ntokens = self.collater_seq_label_s2s(targets, pad) + else: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + else: + targets, lengths, ntokens = self.collater_frm_label( + targets, audio_size, audio_starts, label_rate, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + + def collater_phone_label(self, targets_by_label, audio_size, audio_starts): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + targets, lengths, ntokens = self.collater_frm_phone_label( + targets, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + if self.pad_audio: + return self.sizes[index] + return min(self.sizes[index], self.max_sample_size) + + def ordered_indices(self): + if self.shuffle: + order = [np.random.permutation(len(self))] + else: + order = [np.arange(len(self))] + + order.append(self.sizes) + return np.lexsort(order)[::-1] diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/data/onlyaudiohubert_dataset.py b/SpeechT5/VATLM/vat_hubert/vathubert/data/onlyaudiohubert_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..d864f7d822fb124efb357a7a136117418b131c99 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/data/onlyaudiohubert_dataset.py @@ -0,0 +1,436 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import itertools +import logging +import os +import sys +import time +from typing import Any, List, Optional, Union + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils +from fairseq.data.fairseq_dataset import FairseqDataset +from scipy.io import wavfile +import kaldiio + + +DBG=True if len(sys.argv) == 1 else False + +if DBG: + import utils as custom_utils + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "DEBUG").upper(), + stream=sys.stdout, + ) +else: + from . import utils as custom_utils + +logger = logging.getLogger(__name__) + + +def load_audio(manifest_path, max_keep, min_keep, frame_rate, label_paths, label_rates, tol=0.1): + def is_audio_label_aligned(audio_dur, label_durs): + return all([abs(audio_dur - label_dur) max_keep: + n_long += 1 + else: + audio_path = items[0] + names.append(os.path.join(root, audio_path)) + inds.append(ind) + sizes.append(sz) + tot = ind + 1 + logger.info( + ( + f"label_rates={label_rates}, " + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, names, inds, tot, sizes + + + +def load_label(label_path, inds, tot): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds, tot): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + assert ( + len(code_lengths) == tot + ), f"number of labels does not match ({len(code_lengths)} != {tot})" + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + +def verify_label_lengths( + audio_sizes, + audio_rate, + label_path, + label_rate, + inds, + tot, + tol=0.1, # tolerance in seconds +): + if label_rate < 0: + logger.info(f"{label_path} is sequence label. skipped") + return + + with open(label_path) as f: + lengths = [len(line.rstrip().split()) for line in f] + assert len(lengths) == tot + lengths = [lengths[i] for i in inds] + num_invalid = 0 + for i, ind in enumerate(inds): + dur_from_audio = audio_sizes[i] / audio_rate + dur_from_label = lengths[i] / label_rate + if abs(dur_from_audio - dur_from_label) > tol: + logger.warning( + ( + f"audio and label duration differ too much " + f"(|{dur_from_audio} - {dur_from_label}| > {tol}) " + f"in line {ind+1} of {label_path}. Check if `label_rate` " + f"is correctly set (currently {label_rate}). " + f"num. of samples = {audio_sizes[i]}; " + f"label length = {lengths[i]}" + ) + ) + num_invalid += 1 + if num_invalid > 0: + logger.warning( + f"total {num_invalid} (audio, label) pairs with mismatched lengths" + ) + + +class OnlyAudioHubertDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + single_target: bool = False, + stack_order_audio: int=1, + skip_verify: bool=False, + is_s2s=False, + ): + self.label_rates = ( + [label_rates for _ in range(len(label_paths))] + if isinstance(label_rates, int) + else label_rates + ) + self.audio_root, self.names, inds, tot, self.sizes = load_audio(manifest_path, max_keep_sample_size, min_keep_sample_size, frame_rate=sample_rate, label_paths=label_paths, label_rates=self.label_rates) + self.sample_rate = sample_rate + self.stack_order_audio = stack_order_audio + self.shuffle = shuffle + + self.num_labels = len(label_paths) + self.pad_list = pad_list + self.eos_list = eos_list + self.label_processors = label_processors + self.single_target = single_target + self.store_labels = store_labels + self.is_s2s = is_s2s + + assert self.single_target == (self.label_rates[0] == -1), f"single target should be equivalent to sequence label (label_rate==-1)" + if store_labels: + self.label_list = [load_label(p, inds, tot) for p in label_paths] + + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds, tot) for p in label_paths + ] + assert ( + label_processors is None + or len(label_processors) == self.num_labels + ) + if not skip_verify: + for label_path, label_rate in zip(label_paths, self.label_rates): + verify_label_lengths(self.sizes, self.sample_rate, label_path, label_rate, inds, tot) + else: + logger.info(f"Skip label alignment verifying") + + self.max_sample_size = ( + max_sample_size if max_sample_size is not None else sys.maxsize + ) + self.pad_audio = pad_audio + self.normalize = normalize + + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + + def load_feature(self, mix_name): + """ + Load audio feature + Returns: + audio_feats: numpy.ndarray of shape [T, F] + """ + def stacker(feats, stack_order): + """ + Concatenating consecutive audio frames + Args: + feats - numpy.ndarray of shape [T, F] + stack_order - int (number of neighboring frames to concatenate + Returns: + feats - numpy.ndarray of shape [T', F'] + """ + feat_dim = feats.shape[1] + if len(feats) % stack_order != 0: + res = stack_order - len(feats) % stack_order + res = np.zeros([res, feat_dim]).astype(feats.dtype) + feats = np.concatenate([feats, res], axis=0) + feats = feats.reshape((-1, stack_order, feat_dim)).reshape(-1, stack_order*feat_dim) + return feats + audio_fn = mix_name + + audio_feats = kaldiio.load_mat(audio_fn).astype(np.float32) + audio_feats = stacker(audio_feats, self.stack_order_audio) # [T/stack_order_audio, F*stack_order_audio] + return audio_feats + + + def __getitem__(self, index): + audio_feats = self.load_feature(self.names[index]) + audio_feats = torch.from_numpy(audio_feats.astype(np.float32)) + if self.normalize: + with torch.no_grad(): + audio_feats = F.layer_norm(audio_feats, audio_feats.shape[1:]) + labels = self.get_labels(index) + + + return {"id": index, 'audio_source': audio_feats, "label_list": labels} + + + def __len__(self): + return len(self.sizes) + + def crop_to_max_size(self, wav, target_size, start=None): + size = len(wav) + diff = size - target_size + if diff <= 0: + return wav, 0 + # longer utterances + if start is None: + start, end = 0, target_size + # if self.random_crop: + # start = np.random.randint(0, diff + 1) + # end = size - diff + start + else: + end = start + target_size + return wav[start:end], start + + def collater(self, samples): + samples = [s for s in samples if s["id"] is not None] + if len(samples) == 0: + return {} + + audio_source = [s["audio_source"] for s in samples] + if audio_source[0] is None: + audio_source = None + if audio_source is not None: + audio_sizes = [len(s) for s in audio_source] + + if self.pad_audio: + audio_size = min(max(audio_sizes), self.max_sample_size) + else: + audio_size = min(min(audio_sizes), self.max_sample_size) + if audio_source is not None: + collated_audios, padding_mask, audio_starts = self.collater_audio(audio_source, audio_size) + else: + collated_audios, audio_starts = None, None + + targets_by_label = [ + [s["label_list"][i] for s in samples] + for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, audio_size, audio_starts + ) + + source = {"audio": collated_audios, "video": None} + net_input = {"source": source, "padding_mask": padding_mask} + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + if self.is_s2s: + batch['target'], net_input['prev_output_tokens'] = targets_list[0][0], targets_list[0][1] + else: + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + + return batch + + def collater_audio(self, audios, audio_size, audio_starts=None): + audio_feat_shape = list(audios[0].shape[1:]) + collated_audios = audios[0].new_zeros([len(audios), audio_size]+audio_feat_shape) + padding_mask = ( + torch.BoolTensor(len(audios), audio_size).fill_(False) # + ) + start_known = audio_starts is not None + audio_starts = [0 for _ in audios] if not start_known else audio_starts + for i, audio in enumerate(audios): + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + assert self.pad_audio + collated_audios[i] = torch.cat( + [audio, audio.new_full([-diff]+audio_feat_shape, 0.0)] + ) + padding_mask[i, diff:] = True + else: + collated_audios[i], audio_starts[i] = self.crop_to_max_size( + audio, audio_size, audio_starts[i] if start_known else None + ) + if len(audios[0].shape) == 2: + collated_audios = collated_audios.transpose(1, 2) # [B, T, F] -> [B, F, T] + else: + collated_audios = collated_audios.permute((0, 4, 1, 2, 3)).contiguous() # [B, T, H, W, C] -> [B, C, T, H, W] + return collated_audios, padding_mask, audio_starts + + def collater_frm_label( + self, targets, audio_size, audio_starts, label_rate, pad + ): + assert label_rate > 0 + s2f = label_rate / self.sample_rate # num label per sample + frm_starts = [int(round(s * s2f)) for s in audio_starts] + frm_size = int(round(audio_size * s2f)) + if not self.pad_audio: + rem_size = [len(t) - s for t, s in zip(targets, frm_starts)] + frm_size = min(frm_size, *rem_size) + targets = [t[s: s + frm_size] for t, s in zip(targets, frm_starts)] + logger.debug(f"audio_starts={audio_starts}") + logger.debug(f"frame_starts={frm_starts}") + logger.debug(f"frame_size={frm_size}") + + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + + def collater_seq_label_s2s(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + pad, eos = self.label_processors[0].dictionary.pad(), self.label_processors[0].dictionary.eos() + targets_ = data_utils.collate_tokens(targets, pad_idx=pad, eos_idx=eos, left_pad=False) + prev_output_tokens = data_utils.collate_tokens(targets, pad_idx=pad, eos_idx=eos, left_pad=False, move_eos_to_beginning=True) + return (targets_, prev_output_tokens), lengths, ntokens + + def collater_label(self, targets_by_label, audio_size, audio_starts): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + if label_rate == -1: + if self.is_s2s: + targets, lengths, ntokens = self.collater_seq_label_s2s(targets, pad) + else: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + else: + targets, lengths, ntokens = self.collater_frm_label( + targets, audio_size, audio_starts, label_rate, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + if self.pad_audio: + return self.sizes[index] + return min(self.sizes[index], self.max_sample_size) + + def ordered_indices(self): + if self.shuffle: + order = [np.random.permutation(len(self))] + else: + order = [np.arange(len(self))] + + order.append(self.sizes) + return np.lexsort(order)[::-1] diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/data/texthubert_dataset.py b/SpeechT5/VATLM/vat_hubert/vathubert/data/texthubert_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..d5701df7e81f27e329b8b4e096bf39e6bca58798 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/data/texthubert_dataset.py @@ -0,0 +1,300 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import itertools +import logging +import os +import sys +import time +from typing import Any, List, Optional, Union + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils +from fairseq.data.fairseq_dataset import FairseqDataset + +DBG=True if len(sys.argv) == 1 else False + +if DBG: + import utils as custom_utils + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "DEBUG").upper(), + stream=sys.stdout, + ) +else: + from . import utils as custom_utils + +logger = logging.getLogger(__name__) + + +def load_text(manifest_path, max_keep, min_keep, frame_rate, label_paths, label_rates, tol=0.1): + + n_long, n_short, n_unaligned = 0, 0, 0 + names, inds, sizes = [], [], [] + dur_from_label_list = [] + + with open(manifest_path) as f: + for ind, line in enumerate(f): + items = line.strip().split("\t") + frames = items[0] + sz = int(frames) + if min_keep is not None and sz < min_keep: + n_short += 1 + elif max_keep is not None and sz > max_keep: + n_long += 1 + else: + inds.append(ind) + sizes.append(sz) + + logger.info( + ( + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(inds)}, skipped {n_short} short and {n_long} long" + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + + return inds, sizes + + +def load_label(label_path, inds): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + labels = [labels[i] for i in inds] + return labels + +def load_phone_label(tsv, inds): + with open(tsv) as f: + labels = [line.rstrip() for line in f.readlines()] + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + + +class TextHubertDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + phone_sequence_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + single_target: bool = False, + stack_order_audio: int=1, + skip_verify: bool=False, + is_s2s=False, + ): + self.label_rates = ( + [label_rates for _ in range(len(label_paths))] + if isinstance(label_rates, int) + else label_rates + ) + inds, self.sizes = load_text(manifest_path, max_keep_sample_size, min_keep_sample_size, frame_rate=sample_rate, label_paths=label_paths, label_rates=self.label_rates) + self.sample_rate = sample_rate + self.stack_order_audio = stack_order_audio + self.shuffle = shuffle + + self.num_labels = len(label_paths) + self.pad_list = pad_list + self.eos_list = eos_list + self.label_processors = label_processors + self.phone_processors = phone_sequence_processors + self.single_target = single_target + self.store_labels = store_labels + self.is_s2s = is_s2s + + + if store_labels: + self.label_list = [load_label(p, inds) for p in label_paths] + self.phone_list = [load_phone_label(p, inds) for p in [manifest_path]] + + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds) for p in label_paths + ] + + self.max_sample_size = ( + max_sample_size if max_sample_size is not None else sys.maxsize + ) + self.pad_audio = pad_audio + self.normalize = normalize + + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + def get_phone(self, index, label_idx): + label = self.phone_list[label_idx][index] + if self.phone_processors is not None: + label = self.phone_processors[label_idx](label) + return label + + def get_phones(self, index): + return [self.get_phone(index, i) for i in range(1)] + + + def __getitem__(self, index): + labels = self.get_labels(index) + phone_sequence_list = self.get_phones(index) + + + return {"id": index, "label_list": labels, "phone_sequence_list": phone_sequence_list} + + + def __len__(self): + return len(self.sizes) + + + def collater(self, samples): + samples = [s for s in samples if s["id"] is not None] + if len(samples) == 0: + return {} + + targets_by_label = [ + [s["label_list"][i] for s in samples] + for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, + ) + + phone_sequence_list = [s["phone_sequence_list"] for s in samples] + if phone_sequence_list[0] is None: + phone_sequence_list = None + + targets_by_phone_label = [ + [s["phone_sequence_list"][i] for s in samples] + for i in range(self.num_labels) + ] + targets_phone_list, lengths_phone_list, ntokens_phone_list = self.collater_phone_label( + targets_by_phone_label, + ) + + net_input = {"source": None} + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + if self.is_s2s: + batch['target'], net_input['prev_output_tokens'] = targets_list[0][0], targets_list[0][1] + else: + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + + batch["extra_text_phone_list"] = targets_phone_list + + return batch + + def collater_frm_label( + self, targets, label_rate, pad + ): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + + def collater_frm_phone_label( + self, targets, pad + ): + + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + def collater_label(self, targets_by_label,): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + targets, lengths, ntokens = self.collater_frm_label( + targets, label_rate, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + + def collater_phone_label(self, targets_by_label): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + targets, lengths, ntokens = self.collater_frm_phone_label( + targets, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + if self.pad_audio: + return self.sizes[index] + return min(self.sizes[index], self.max_sample_size) + + def ordered_indices(self): + if self.shuffle: + order = [np.random.permutation(len(self))] + else: + order = [np.arange(len(self))] + + order.append(self.sizes) + return np.lexsort(order)[::-1] diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/data/utils.py b/SpeechT5/VATLM/vat_hubert/vathubert/data/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..666500334e3223273e1df67125796116a429c3c9 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/data/utils.py @@ -0,0 +1,300 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- +import cv2 +import torch +import random +import numpy as np +from typing import Dict, List, Optional, Tuple + +def load_video(path): + for i in range(3): + try: + cap = cv2.VideoCapture(path) + frames = [] + while True: + ret, frame = cap.read() + if ret: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + frames.append(frame) + else: + break + frames = np.stack(frames) + return frames + except Exception: + print(f"failed loading {path} ({i} / 3)") + if i == 2: + raise ValueError(f"Unable to load {path}") + + +class Compose(object): + """Compose several preprocess together. + Args: + preprocess (list of ``Preprocess`` objects): list of preprocess to compose. + """ + + def __init__(self, preprocess): + self.preprocess = preprocess + + def __call__(self, sample): + for t in self.preprocess: + sample = t(sample) + return sample + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.preprocess: + format_string += '\n' + format_string += ' {0}'.format(t) + format_string += '\n)' + return format_string + + +class Normalize(object): + """Normalize a ndarray image with mean and standard deviation. + """ + + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, frames): + """ + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be normalized. + Returns: + Tensor: Normalized Tensor image. + """ + frames = (frames - self.mean) / self.std + return frames + + def __repr__(self): + return self.__class__.__name__+'(mean={0}, std={1})'.format(self.mean, self.std) + +class CenterCrop(object): + """Crop the given image at the center + """ + def __init__(self, size): + self.size = size + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be cropped. + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + th, tw = self.size + delta_w = int(round((w - tw))/2.) + delta_h = int(round((h - th))/2.) + frames = frames[:, delta_h:delta_h+th, delta_w:delta_w+tw] + return frames + + +class RandomCrop(object): + """Crop the given image at the center + """ + + def __init__(self, size): + self.size = size + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be cropped. + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + th, tw = self.size + delta_w = random.randint(0, w-tw) + delta_h = random.randint(0, h-th) + frames = frames[:, delta_h:delta_h+th, delta_w:delta_w+tw] + return frames + + def __repr__(self): + return self.__class__.__name__ + '(size={0})'.format(self.size) + +class HorizontalFlip(object): + """Flip image horizontally. + """ + + def __init__(self, flip_ratio): + self.flip_ratio = flip_ratio + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be flipped with a probability flip_ratio + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + if random.random() < self.flip_ratio: + for index in range(t): + frames[index] = cv2.flip(frames[index], 1) + return frames + +def compute_mask_indices( + shape: Tuple[int, int], + padding_mask: Optional[torch.Tensor], + mask_prob: float, + mask_length: int, + mask_type: str = "static", + mask_other: float = 0.0, + min_masks: int = 0, + no_overlap: bool = False, + min_space: int = 0, +) -> np.ndarray: + """ + Computes random mask spans for a given shape + Args: + shape: the the shape for which to compute masks. + should be of size 2 where first element is batch size and 2nd is timesteps + padding_mask: optional padding mask of the same size as shape, which will prevent masking padded elements + mask_prob: probability for each token to be chosen as start of the span to be masked. this will be multiplied by + number of timesteps divided by length of mask span to mask approximately this percentage of all elements. + however due to overlaps, the actual number will be smaller (unless no_overlap is True) + mask_type: how to compute mask lengths + static = fixed size + uniform = sample from uniform distribution [mask_other, mask_length*2] + normal = sample from normal distribution with mean mask_length and stdev mask_other. mask is min 1 element + poisson = sample from possion distribution with lambda = mask length + min_masks: minimum number of masked spans + no_overlap: if false, will switch to an alternative recursive algorithm that prevents spans from overlapping + min_space: only used if no_overlap is True, this is how many elements to keep unmasked between spans + """ + + bsz, all_sz = shape + mask = np.full((bsz, all_sz), False) + + all_num_mask = int( + # add a random number for probabilistic rounding + mask_prob * all_sz / float(mask_length) + + np.random.rand() + ) + + all_num_mask = max(min_masks, all_num_mask) + + mask_idcs = [] + for i in range(bsz): + if padding_mask is not None: + sz = all_sz - padding_mask[i].long().sum().item() + num_mask = int( + # add a random number for probabilistic rounding + mask_prob * sz / float(mask_length) + + np.random.rand() + ) + num_mask = max(min_masks, num_mask) + else: + sz = all_sz + num_mask = all_num_mask + + if mask_type == "static": + lengths = np.full(num_mask, mask_length) + elif mask_type == "uniform": + lengths = np.random.randint(mask_other, mask_length * 2 + 1, size=num_mask) + elif mask_type == "normal": + lengths = np.random.normal(mask_length, mask_other, size=num_mask) + lengths = [max(1, int(round(x))) for x in lengths] + elif mask_type == "poisson": + lengths = np.random.poisson(mask_length, size=num_mask) + lengths = [int(round(x)) for x in lengths] + else: + raise Exception("unknown mask selection " + mask_type) + + if sum(lengths) == 0: + lengths[0] = min(mask_length, sz - 1) + + if no_overlap: + mask_idc = [] + + def arrange(s, e, length, keep_length): + span_start = np.random.randint(s, e - length) + mask_idc.extend(span_start + i for i in range(length)) + + new_parts = [] + if span_start - s - min_space >= keep_length: + new_parts.append((s, span_start - min_space + 1)) + if e - span_start - keep_length - min_space > keep_length: + new_parts.append((span_start + length + min_space, e)) + return new_parts + + parts = [(0, sz)] + min_length = min(lengths) + for length in sorted(lengths, reverse=True): + lens = np.fromiter( + (e - s if e - s >= length + min_space else 0 for s, e in parts), + np.int, + ) + l_sum = np.sum(lens) + if l_sum == 0: + break + probs = lens / np.sum(lens) + c = np.random.choice(len(parts), p=probs) + s, e = parts.pop(c) + parts.extend(arrange(s, e, length, min_length)) + mask_idc = np.asarray(mask_idc) + else: + min_len = min(lengths) + if sz - min_len <= num_mask: + min_len = sz - num_mask - 1 + + mask_idc = np.random.choice(sz - min_len, num_mask, replace=False) + + mask_idc = np.asarray( + [ + mask_idc[j] + offset + for j in range(len(mask_idc)) + for offset in range(lengths[j]) + ] + ) + + mask_idcs.append(np.unique(mask_idc[mask_idc < sz])) + + min_len = min([len(m) for m in mask_idcs]) + batch_indexes, starts, ends = [], [], [] + for i, mask_idc in enumerate(mask_idcs): + if len(mask_idc) > min_len: + mask_idc = np.random.choice(mask_idc, min_len, replace=False) + mask[i, mask_idc] = True + vals, run_starts, run_lengths = find_runs(mask[i]) + start_indices, lengths = run_starts[vals == True], run_lengths[vals == True] + starts.append(start_indices) + ends.append(start_indices+lengths) + batch_indexes.append(np.zeros([len(start_indices)])+i) + return mask, np.concatenate(starts).astype(np.int64), np.concatenate(ends).astype(np.int64), np.concatenate(batch_indexes).astype(np.int64) + +def find_runs(x): + """Find runs of consecutive items in an array.""" + + # ensure array + x = np.asanyarray(x) + if x.ndim != 1: + raise ValueError('only 1D array supported') + n = x.shape[0] + + # handle empty array + if n == 0: + return np.array([]), np.array([]), np.array([]) + + else: + # find run starts + loc_run_start = np.empty(n, dtype=bool) + loc_run_start[0] = True + np.not_equal(x[:-1], x[1:], out=loc_run_start[1:]) + run_starts = np.nonzero(loc_run_start)[0] + + # find run values + run_values = x[loc_run_start] + + # find run lengths + run_lengths = np.diff(np.append(run_starts, n)) + + return run_values, run_starts, run_lengths diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/data/vathubert_dataset.py b/SpeechT5/VATLM/vat_hubert/vathubert/data/vathubert_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..f1cf0939ae46661508914028e29e522b09f9afe2 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/data/vathubert_dataset.py @@ -0,0 +1,530 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import itertools +import logging +import os +import sys +import time +from typing import Any, List, Optional, Union + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils +from fairseq.data.fairseq_dataset import FairseqDataset +from scipy.io import wavfile +import kaldiio + +DBG=True if len(sys.argv) == 1 else False + +if DBG: + import utils as custom_utils + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "DEBUG").upper(), + stream=sys.stdout, + ) +else: + from . import utils as custom_utils + +logger = logging.getLogger(__name__) + + +def load_audio_visual(manifest_path, max_keep, min_keep, frame_rate, label_paths, label_rates, tol=0.1): + def is_audio_label_aligned(audio_dur, label_durs): + return all([abs(audio_dur - label_dur) max_keep: + n_long += 1 + elif (not is_seq_label) and (not is_audio_label_aligned(sz/frame_rate, dur_from_label_list[ind])): + n_unaligned += 1 + else: + video_path = items[1] + audio_path = items[2] + audio_id = items[0] + names.append((video_path, audio_path+','+audio_id)) + inds.append(ind) + sizes.append(sz) + tot = ind + 1 + logger.info( + ( + f"label_rates={label_rates}, " + f"max_keep={max_keep}, min_keep={min_keep}, " + f"loaded {len(names)}, skipped {n_short} short and {n_long} long and {n_unaligned} unaligned, " + f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}" + ) + ) + return root, names, inds, tot, sizes + +def load_label(label_path, inds, tot): + with open(label_path) as f: + labels = [line.rstrip() for line in f] + assert ( + len(labels) == tot + ), f"number of labels does not match ({len(labels)} != {tot})" + labels = [labels[i] for i in inds] + return labels + + +def load_label_offset(label_path, inds, tot): + with open(label_path) as f: + code_lengths = [len(line.encode("utf-8")) for line in f] + assert ( + len(code_lengths) == tot + ), f"number of labels does not match ({len(code_lengths)} != {tot})" + offsets = list(itertools.accumulate([0] + code_lengths)) + offsets = [(offsets[i], offsets[i + 1]) for i in inds] + return offsets + + +def verify_label_lengths( + audio_sizes, + audio_rate, + label_path, + label_rate, + inds, + tot, + tol=0.1, # tolerance in seconds +): + if label_rate < 0: + logger.info(f"{label_path} is sequence label. skipped") + return + + with open(label_path) as f: + lengths = [len(line.rstrip().split()) for line in f] + assert len(lengths) == tot + lengths = [lengths[i] for i in inds] + num_invalid = 0 + for i, ind in enumerate(inds): + dur_from_audio = audio_sizes[i] / audio_rate + dur_from_label = lengths[i] / label_rate + if abs(dur_from_audio - dur_from_label) > tol: + logger.warning( + ( + f"audio and label duration differ too much " + f"(|{dur_from_audio} - {dur_from_label}| > {tol}) " + f"in line {ind+1} of {label_path}. Check if `label_rate` " + f"is correctly set (currently {label_rate}). " + f"num. of samples = {audio_sizes[i]}; " + f"label length = {lengths[i]}" + ) + ) + num_invalid += 1 + if num_invalid > 0: + logger.warning( + f"total {num_invalid} (audio, label) pairs with mismatched lengths" + ) + + +class VATHubertDataset(FairseqDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + random_crop: bool = False, + single_target: bool = False, + stack_order_audio: int=1, + skip_verify: bool=False, + image_mean: float=0, + image_std: float=1, + image_crop_size: int=88, + image_aug: bool=False, + modalities: Optional[List[str]]=None, + is_s2s=False, + noise_fn=None, + noise_prob=0, + noise_snr=0, + noise_num=1 + ): + self.label_rates = ( + [label_rates for _ in range(len(label_paths))] + if isinstance(label_rates, int) + else label_rates + ) + self.modalities = set(modalities) + self.audio_root, self.names, inds, tot, self.sizes = load_audio_visual(manifest_path, max_keep_sample_size, min_keep_sample_size, frame_rate=sample_rate, label_paths=label_paths, label_rates=self.label_rates) + self.sample_rate = sample_rate + self.stack_order_audio = stack_order_audio + self.shuffle = shuffle + self.random_crop = random_crop + + self.num_labels = len(label_paths) + self.pad_list = pad_list + self.eos_list = eos_list + self.label_processors = label_processors + self.single_target = single_target + self.store_labels = store_labels + self.is_s2s = is_s2s + self.noise_wav, self.noise_prob, self.noise_snr, self.noise_num = [ln.strip() for ln in open(noise_fn).readlines()] if noise_fn is not None else [], noise_prob, noise_snr, noise_num + + assert self.single_target == (self.label_rates[0] == -1), f"single target should be equivalent to sequence label (label_rate==-1)" + if store_labels: + self.label_list = [load_label(p, inds, tot) for p in label_paths] + else: + self.label_paths = label_paths + self.label_offsets_list = [ + load_label_offset(p, inds, tot) for p in label_paths + ] + assert ( + label_processors is None + or len(label_processors) == self.num_labels + ) + if not skip_verify: + for label_path, label_rate in zip(label_paths, self.label_rates): + verify_label_lengths(self.sizes, self.sample_rate, label_path, label_rate, inds, tot) + else: + logger.info(f"Skip label alignment verifying") + + self.max_sample_size = ( + max_sample_size if max_sample_size is not None else sys.maxsize + ) + self.pad_audio = pad_audio + self.normalize = normalize + if image_aug: + self.transform = custom_utils.Compose([ + custom_utils.Normalize( 0.0,255.0 ), + custom_utils.RandomCrop((image_crop_size, image_crop_size)), + custom_utils.HorizontalFlip(0.5), + custom_utils.Normalize(image_mean, image_std) ]) + else: + self.transform = custom_utils.Compose([ + custom_utils.Normalize( 0.0,255.0 ), + custom_utils.CenterCrop((image_crop_size, image_crop_size)), + custom_utils.Normalize(image_mean, image_std) ]) + logger.info(f"image transform: {self.transform}") + + logger.info( + f"pad_audio={pad_audio}, random_crop={random_crop}, " + f"normalize={normalize}, max_sample_size={self.max_sample_size}, " + f"seqs2seq data={self.is_s2s},") + logger.info( + f"Noise wav: {noise_fn}->{len(self.noise_wav)} wav, Prob: {self.noise_prob}, SNR: {self.noise_snr}, Number of mixture: {self.noise_num}" + ) + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def get_labels(self, index): + return [self.get_label(index, i) for i in range(self.num_labels)] + + def load_feature(self, mix_name): + """ + Load image and audio feature + Returns: + video_feats: numpy.ndarray of shape [T, H, W, 1], audio_feats: numpy.ndarray of shape [T, F] + """ + def stacker(feats, stack_order): + """ + Concatenating consecutive audio frames + Args: + feats - numpy.ndarray of shape [T, F] + stack_order - int (number of neighboring frames to concatenate + Returns: + feats - numpy.ndarray of shape [T', F'] + """ + feat_dim = feats.shape[1] + if len(feats) % stack_order != 0: + res = stack_order - len(feats) % stack_order + res = np.zeros([res, feat_dim]).astype(feats.dtype) + feats = np.concatenate([feats, res], axis=0) + feats = feats.reshape((-1, stack_order, feat_dim)).reshape(-1, stack_order*feat_dim) + return feats + video_fn, audio_fn = mix_name + if 'video' in self.modalities: + video_feats = self.load_video(video_fn) # [T, H, W, 1] + else: + video_feats = None + if 'audio' in self.modalities: + audio_fn = audio_fn.split(',')[0] + audio_feats = kaldiio.load_mat(audio_fn).astype(np.float32) + + audio_feats = stacker(audio_feats, self.stack_order_audio) # [T/stack_order_audio, F*stack_order_audio] + else: + audio_feats = None + if audio_feats is not None and video_feats is not None: + diff = len(audio_feats) - len(video_feats) + if diff < 0: + audio_feats = np.concatenate([audio_feats, np.zeros([-diff, audio_feats.shape[-1]], dtype=audio_feats.dtype)]) + elif diff > 0: + audio_feats = audio_feats[:-diff] + return video_feats, audio_feats + + def load_video(self, audio_name): + feats = custom_utils.load_video(os.path.join(self.audio_root, audio_name)) + feats = self.transform(feats) + feats = np.expand_dims(feats, axis=-1) + return feats + + def select_noise(self): + rand_indexes = np.random.randint(0, len(self.noise_wav), size=self.noise_num) + noise_wav = [] + for x in rand_indexes: + noise_wav.append(wavfile.read(self.noise_wav[x])[1].astype(np.float32)) + if self.noise_num == 1: + return noise_wav[0] + else: + min_len = min([len(x) for x in noise_wav]) + noise_wav = [x[:min_len] for x in noise_wav] + noise_wav = np.floor(np.stack(noise_wav).mean(axis=0)) + return noise_wav + + def add_noise(self, clean_wav): + clean_wav = clean_wav.astype(np.float32) + noise_wav = self.select_noise() + if type(self.noise_snr) == int or type(self.noise_snr) == float: + snr = self.noise_snr + elif type(self.noise_snr) == tuple: + snr = np.random.randint(self.noise_snr[0], self.noise_snr[1]+1) + clean_rms = np.sqrt(np.mean(np.square(clean_wav), axis=-1)) + if len(clean_wav) > len(noise_wav): + ratio = int(np.ceil(len(clean_wav)/len(noise_wav))) + noise_wav = np.concatenate([noise_wav for _ in range(ratio)]) + if len(clean_wav) < len(noise_wav): + start = 0 + noise_wav = noise_wav[start: start + len(clean_wav)] + noise_rms = np.sqrt(np.mean(np.square(noise_wav), axis=-1)) + adjusted_noise_rms = clean_rms / (10**(snr/20)) + adjusted_noise_wav = noise_wav * (adjusted_noise_rms / noise_rms) + mixed = clean_wav + adjusted_noise_wav + + #Avoid clipping noise + max_int16 = np.iinfo(np.int16).max + min_int16 = np.iinfo(np.int16).min + if mixed.max(axis=0) > max_int16 or mixed.min(axis=0) < min_int16: + if mixed.max(axis=0) >= abs(mixed.min(axis=0)): + reduction_rate = max_int16 / mixed.max(axis=0) + else : + reduction_rate = min_int16 / mixed.min(axis=0) + mixed = mixed * (reduction_rate) + mixed = mixed.astype(np.int16) + return mixed + + def __getitem__(self, index): + video_feats, audio_feats = self.load_feature(self.names[index]) + audio_feats, video_feats = torch.from_numpy(audio_feats.astype(np.float32)) if audio_feats is not None else None, torch.from_numpy(video_feats.astype(np.float32)) if video_feats is not None else None + if self.normalize and 'audio' in self.modalities: + with torch.no_grad(): + audio_feats = F.layer_norm(audio_feats, audio_feats.shape[1:]) + labels = self.get_labels(index) + fid = self.names[index][1].split(':')[1] + return {"id": index, 'fid': fid, "video_source": video_feats, 'audio_source': audio_feats, "label_list": labels} + + def __len__(self): + return len(self.sizes) + + def crop_to_max_size(self, wav, target_size, start=None): + size = len(wav) + diff = size - target_size + if diff <= 0: + return wav, 0 + # longer utterances + if start is None: + start, end = 0, target_size + if self.random_crop: + start = np.random.randint(0, diff + 1) + end = size - diff + start + else: + end = start + target_size + return wav[start:end], start + + def collater(self, samples): + samples = [s for s in samples if s["id"] is not None] + if len(samples) == 0: + return {} + + audio_source, video_source = [s["audio_source"] for s in samples], [s["video_source"] for s in samples] + if audio_source[0] is None: + audio_source = None + if video_source[0] is None: + video_source = None + if audio_source is not None: + audio_sizes = [len(s) for s in audio_source] + else: + audio_sizes = [len(s) for s in video_source] + if self.pad_audio: + audio_size = min(max(audio_sizes), self.max_sample_size) + else: + audio_size = min(min(audio_sizes), self.max_sample_size) + if audio_source is not None: + collated_audios, padding_mask, audio_starts = self.collater_audio(audio_source, audio_size) + else: + collated_audios, audio_starts = None, None + if video_source is not None: + collated_videos, padding_mask, audio_starts = self.collater_audio(video_source, audio_size, audio_starts) + else: + collated_videos = None + targets_by_label = [ + [s["label_list"][i] for s in samples] + for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, audio_size, audio_starts + ) + source = {"audio": collated_audios, "video": collated_videos} + net_input = {"source": source, "padding_mask": padding_mask} + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + "utt_id": [s['fid'] for s in samples] + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + if self.is_s2s: + batch['target'], net_input['prev_output_tokens'] = targets_list[0][0], targets_list[0][1] + else: + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + return batch + + def collater_audio(self, audios, audio_size, audio_starts=None): + audio_feat_shape = list(audios[0].shape[1:]) + collated_audios = audios[0].new_zeros([len(audios), audio_size]+audio_feat_shape) + padding_mask = ( + torch.BoolTensor(len(audios), audio_size).fill_(False) # + ) + start_known = audio_starts is not None + audio_starts = [0 for _ in audios] if not start_known else audio_starts + for i, audio in enumerate(audios): + diff = len(audio) - audio_size + if diff == 0: + collated_audios[i] = audio + elif diff < 0: + assert self.pad_audio + collated_audios[i] = torch.cat( + [audio, audio.new_full([-diff]+audio_feat_shape, 0.0)] + ) + padding_mask[i, diff:] = True + else: + collated_audios[i], audio_starts[i] = self.crop_to_max_size( + audio, audio_size, audio_starts[i] if start_known else None + ) + if len(audios[0].shape) == 2: + collated_audios = collated_audios.transpose(1, 2) # [B, T, F] -> [B, F, T] + else: + collated_audios = collated_audios.permute((0, 4, 1, 2, 3)).contiguous() # [B, T, H, W, C] -> [B, C, T, H, W] + return collated_audios, padding_mask, audio_starts + + def collater_frm_label( + self, targets, audio_size, audio_starts, label_rate, pad + ): + assert label_rate > 0 + s2f = label_rate / self.sample_rate # num label per sample + frm_starts = [int(round(s * s2f)) for s in audio_starts] + frm_size = int(round(audio_size * s2f)) + if not self.pad_audio: + rem_size = [len(t) - s for t, s in zip(targets, frm_starts)] + frm_size = min(frm_size, *rem_size) + targets = [t[s: s + frm_size] for t, s in zip(targets, frm_starts)] + logger.debug(f"audio_starts={audio_starts}") + logger.debug(f"frame_starts={frm_starts}") + logger.debug(f"frame_size={frm_size}") + + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + def collater_seq_label(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + targets = data_utils.collate_tokens( + targets, pad_idx=pad, left_pad=False + ) + return targets, lengths, ntokens + + def collater_seq_label_s2s(self, targets, pad): + lengths = torch.LongTensor([len(t) for t in targets]) + ntokens = lengths.sum().item() + pad, eos = self.label_processors[0].dictionary.pad(), self.label_processors[0].dictionary.eos() + targets_ = data_utils.collate_tokens(targets, pad_idx=pad, eos_idx=eos, left_pad=False) + prev_output_tokens = data_utils.collate_tokens(targets, pad_idx=pad, eos_idx=eos, left_pad=False, move_eos_to_beginning=True) + return (targets_, prev_output_tokens), lengths, ntokens + + def collater_label(self, targets_by_label, audio_size, audio_starts): + targets_list, lengths_list, ntokens_list = [], [], [] + itr = zip(targets_by_label, self.label_rates, self.pad_list) + for targets, label_rate, pad in itr: + if label_rate == -1: + if self.is_s2s: + targets, lengths, ntokens = self.collater_seq_label_s2s(targets, pad) + else: + targets, lengths, ntokens = self.collater_seq_label(targets, pad) + else: + targets, lengths, ntokens = self.collater_frm_label( + targets, audio_size, audio_starts, label_rate, pad + ) + targets_list.append(targets) + lengths_list.append(lengths) + ntokens_list.append(ntokens) + return targets_list, lengths_list, ntokens_list + + def num_tokens(self, index): + return self.size(index) + + def size(self, index): + if self.pad_audio: + return self.sizes[index] + return min(self.sizes[index], self.max_sample_size) + + def ordered_indices(self): + if self.shuffle: + order = [np.random.permutation(len(self))] + else: + order = [np.arange(len(self))] + + order.append(self.sizes) + return np.lexsort(order)[::-1] diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/decode_avhubert_lrs3.sh b/SpeechT5/VATLM/vat_hubert/vathubert/decode_avhubert_lrs3.sh new file mode 100644 index 0000000000000000000000000000000000000000..867fb7a0857ac66f8c738b93aa75495a70a03cc6 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/decode_avhubert_lrs3.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +decode_path=/path/to/finetuned_model +finetuned_model=checkpoint_best.pt +beam=50 +data=$1 +[ -z $data ] && data="test" + +python -B infer_s2s.py --config-dir /path/to/vat_hubert/vathubert/conf/ --config-name s2s_decode.yaml \ + dataset.gen_subset=${data} common_eval.path=${decode_path}/checkpoints/${finetuned_model} \ + common_eval.results_path=${decode_path}/${finetuned_model}_${data}_video_beam${beam} \ + override.modalities=["video"] \ + common.user_dir=/path/to/vat_hubert/vathubert \ + override.data=/path/to/data \ + override.label_dir=/path/to/data \ + generation.beam=${beam} + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/infer_s2s.py b/SpeechT5/VATLM/vat_hubert/vathubert/infer_s2s.py new file mode 100644 index 0000000000000000000000000000000000000000..d86d5ec0bc96a404a004684bdc042e3ca0fdceec --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/infer_s2s.py @@ -0,0 +1,321 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import ast +from itertools import chain +import logging +import math +import os +import sys +import json +import hashlib +import editdistance +from argparse import Namespace + +import numpy as np +import torch +from fairseq import checkpoint_utils, options, tasks, utils, distributed_utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.logging import progress_bar +from fairseq.logging.meters import StopwatchMeter, TimeMeter +from fairseq.models import FairseqLanguageModel +from omegaconf import DictConfig + +from pathlib import Path +import hydra +from hydra.core.config_store import ConfigStore +from fairseq.dataclass.configs import ( + CheckpointConfig, + CommonConfig, + CommonEvalConfig, + DatasetConfig, + DistributedTrainingConfig, + GenerationConfig, + FairseqDataclass, +) +from dataclasses import dataclass, field, is_dataclass +from typing import Any, Dict, List, Optional, Tuple, Union +from omegaconf import OmegaConf + +logging.root.setLevel(logging.INFO) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +config_path = Path(__file__).resolve().parent / "conf" + +@dataclass +class OverrideConfig(FairseqDataclass): + noise_wav: Optional[str] = field(default=None, metadata={'help': 'noise wav file'}) + noise_prob: float = field(default=0, metadata={'help': 'noise probability'}) + noise_snr: float = field(default=0, metadata={'help': 'noise SNR in audio'}) + modalities: List[str] = field(default_factory=lambda: [""], metadata={'help': 'which modality to use'}) + data: Optional[str] = field(default=None, metadata={'help': 'path to test data directory'}) + label_dir: Optional[str] = field(default=None, metadata={'help': 'path to test label directory'}) + +@dataclass +class InferConfig(FairseqDataclass): + task: Any = None + generation: GenerationConfig = GenerationConfig() + common: CommonConfig = CommonConfig() + common_eval: CommonEvalConfig = CommonEvalConfig() + checkpoint: CheckpointConfig = CheckpointConfig() + distributed_training: DistributedTrainingConfig = DistributedTrainingConfig() + dataset: DatasetConfig = DatasetConfig() + override: OverrideConfig = OverrideConfig() + is_ax: bool = field( + default=False, + metadata={ + "help": "if true, assumes we are using ax for tuning and returns a tuple for ax to consume" + }, + ) + + +def main(cfg: DictConfig): + + if isinstance(cfg, Namespace): + cfg = convert_namespace_to_omegaconf(cfg) + + assert cfg.common_eval.path is not None, "--path required for recognition!" + assert ( + not cfg.generation.sampling or cfg.generation.nbest == cfg.generation.beam + ), "--sampling requires --nbest to be equal to --beam" + + if cfg.common_eval.results_path is not None: + os.makedirs(cfg.common_eval.results_path, exist_ok=True) + output_path = os.path.join(cfg.common_eval.results_path, "decode.log") + with open(output_path, "w", buffering=1, encoding="utf-8") as h: + return _main(cfg, h) + return _main(cfg, sys.stdout) + + +def get_symbols_to_strip_from_output(generator): + if hasattr(generator, "symbols_to_strip_from_output"): + return generator.symbols_to_strip_from_output + else: + return {generator.eos, generator.pad} + +def _main(cfg, output_file): + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=output_file, + ) + logger = logging.getLogger("hybrid.speech_recognize") + if output_file is not sys.stdout: # also print to stdout + logger.addHandler(logging.StreamHandler(sys.stdout)) + + utils.import_user_module(cfg.common) + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([cfg.common_eval.path]) + models = [model.eval().cuda() for model in models] + saved_cfg.task.modalities = cfg.override.modalities + task = tasks.setup_task(saved_cfg.task) + + task.build_tokenizer(saved_cfg.tokenizer) + task.build_bpe(saved_cfg.bpe) + + logger.info(cfg) + + # Fix seed for stochastic decoding + if cfg.common.seed is not None and not cfg.generation.no_seed_provided: + np.random.seed(cfg.common.seed) + utils.set_torch_seed(cfg.common.seed) + + use_cuda = torch.cuda.is_available() + + # Set dictionary + dictionary = task.target_dictionary + + # loading the dataset should happen after the checkpoint has been loaded so we can give it the saved task config + task.cfg.noise_prob = cfg.override.noise_prob + task.cfg.noise_snr = cfg.override.noise_snr + task.cfg.noise_wav = cfg.override.noise_wav + if cfg.override.data is not None: + task.cfg.data = cfg.override.data + if cfg.override.label_dir is not None: + task.cfg.label_dir = cfg.override.label_dir + task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task) + + lms = [None] + + # Optimize ensemble for generation + for model in chain(models, lms): + if model is None: + continue + if cfg.common.fp16: + model.half() + if use_cuda and not cfg.distributed_training.pipeline_model_parallel: + model.cuda() + model.prepare_for_inference_(cfg) + + # Load dataset (possibly sharded) + itr = task.get_batch_iterator( + dataset=task.dataset(cfg.dataset.gen_subset), + max_tokens=cfg.dataset.max_tokens, + max_sentences=cfg.dataset.batch_size, + max_positions=utils.resolve_max_positions( + task.max_positions(), *[m.max_positions() for m in models] + ), + ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=cfg.dataset.required_batch_size_multiple, + seed=cfg.common.seed, + num_shards=cfg.distributed_training.distributed_world_size, + shard_id=cfg.distributed_training.distributed_rank, + num_workers=cfg.dataset.num_workers, + data_buffer_size=cfg.dataset.data_buffer_size, + ).next_epoch_itr(shuffle=False) + progress = progress_bar.progress_bar( + itr, + log_format=cfg.common.log_format, + log_interval=cfg.common.log_interval, + default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), + ) + + # Initialize generator + if cfg.generation.match_source_len: + logger.warning( + "The option match_source_len is not applicable to speech recognition. Ignoring it." + ) + gen_timer = StopwatchMeter() + extra_gen_cls_kwargs = { + "lm_model": lms[0], + "lm_weight": cfg.generation.lm_weight, + } + cfg.generation.score_reference = False # + save_attention_plot = cfg.generation.print_alignment is not None + cfg.generation.print_alignment = None # + generator = task.build_generator( + models, cfg.generation, extra_gen_cls_kwargs=extra_gen_cls_kwargs + ) + + def decode_fn(x): + symbols_ignore = get_symbols_to_strip_from_output(generator) + symbols_ignore.add(dictionary.pad()) + if hasattr(task.datasets[cfg.dataset.gen_subset].label_processors[0], 'decode'): + return task.datasets[cfg.dataset.gen_subset].label_processors[0].decode(x, symbols_ignore) + chars = dictionary.string(x, extra_symbols_to_ignore=symbols_ignore) + words = " ".join("".join(chars.split()).replace('|', ' ').split()) + return words + + num_sentences = 0 + has_target = True + wps_meter = TimeMeter() + result_dict = {'utt_id': [], 'ref': [], 'hypo': []} + for sample in progress: + sample = utils.move_to_cuda(sample) if use_cuda else sample + if "net_input" not in sample: + continue + + prefix_tokens = None + if cfg.generation.prefix_size > 0: + prefix_tokens = sample["target"][:, : cfg.generation.prefix_size] + + constraints = None + if "constraints" in sample: + constraints = sample["constraints"] + + gen_timer.start() + hypos = task.inference_step( + generator, + models, + sample, + prefix_tokens=prefix_tokens, + constraints=constraints, + ) + num_generated_tokens = sum(len(h[0]["tokens"]) for h in hypos) + gen_timer.stop(num_generated_tokens) + + for i in range(len(sample["id"])): + result_dict['utt_id'].append(sample['utt_id'][i]) + ref_sent = decode_fn(sample['target'][i].int().cpu()) + result_dict['ref'].append(ref_sent) + best_hypo = hypos[i][0]['tokens'].int().cpu() + hypo_str = decode_fn(best_hypo) + result_dict['hypo'].append(hypo_str) + logger.info(f"\nREF:{ref_sent}\nHYP:{hypo_str}\n") + wps_meter.update(num_generated_tokens) + progress.log({"wps": round(wps_meter.avg)}) + num_sentences += sample["nsentences"] if "nsentences" in sample else sample["id"].numel() + + logger.info("NOTE: hypothesis and token scores are output in base 2") + logger.info("Recognized {:,} utterances ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)".format( + num_sentences, gen_timer.n, gen_timer.sum, num_sentences / gen_timer.sum, 1. / gen_timer.avg)) + + yaml_str = OmegaConf.to_yaml(cfg.generation) + fid = int(hashlib.md5(yaml_str.encode("utf-8")).hexdigest(), 16) + fid = fid % 1000000 + result_fn = f"{cfg.common_eval.results_path}/hypo-{fid}.json" + json.dump(result_dict, open(result_fn, 'w'), indent=4) + n_err, n_total = 0, 0 + assert len(result_dict['hypo']) == len(result_dict['ref']) + for hypo, ref in zip(result_dict['hypo'], result_dict['ref']): + hypo, ref = hypo.strip().split(), ref.strip().split() + n_err += editdistance.eval(hypo, ref) + n_total += len(ref) + wer = 100 * n_err / n_total + wer_fn = f"{cfg.common_eval.results_path}/wer.{fid}" + with open(wer_fn, "w") as fo: + fo.write(f"WER: {wer}\n") + fo.write(f"err / num_ref_words = {n_err} / {n_total}\n\n") + fo.write(f"{yaml_str}") + logger.info(f"WER: {wer}%") + return + + +@hydra.main(config_path=config_path, config_name="infer") +def hydra_main(cfg: InferConfig) -> Union[float, Tuple[float, Optional[float]]]: + container = OmegaConf.to_container(cfg, resolve=True, enum_to_str=True) + cfg = OmegaConf.create(container) + OmegaConf.set_struct(cfg, True) + + if cfg.common.reset_logging: + reset_logging() + + wer = float("inf") + + try: + if cfg.common.profile: + with torch.cuda.profiler.profile(): + with torch.autograd.profiler.emit_nvtx(): + distributed_utils.call_main(cfg, main) + else: + distributed_utils.call_main(cfg, main) + + except BaseException as e: # pylint: disable=broad-except + if not cfg.common.suppress_crashes: + raise + else: + logger.error("Crashed! %s", str(e)) + return + + +def cli_main() -> None: + try: + from hydra._internal.utils import ( + get_args, + ) # pylint: disable=import-outside-toplevel + + cfg_name = get_args().config_name or "infer" + except ImportError: + logger.warning("Failed to get config name from hydra args") + cfg_name = "infer" + + cs = ConfigStore.instance() + cs.store(name=cfg_name, node=InferConfig) + + for k in InferConfig.__dataclass_fields__: + if is_dataclass(InferConfig.__dataclass_fields__[k].type): + v = InferConfig.__dataclass_fields__[k].default + cs.store(name=k, node=v) + + hydra_main() # pylint: disable=no-value-for-parameter + + +if __name__ == "__main__": + cli_main() diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/models/decoder.py b/SpeechT5/VATLM/vat_hubert/vathubert/models/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..481842b2210229c2c41a279f8febdce65027fb62 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/models/decoder.py @@ -0,0 +1,246 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +from argparse import Namespace +import contextlib +import copy +import math +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from dataclasses import dataclass, field +from omegaconf import MISSING, II, open_dict +from typing import Any, Optional + +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.tasks import FairseqTask +from fairseq.models import ( + BaseFairseqModel, + FairseqEncoder, + FairseqEncoderDecoderModel, + FairseqIncrementalDecoder, + register_model, +) +# from fairseq.models.wav2vec.wav2vec2 import MASKING_DISTRIBUTION_CHOICES +from fairseq.modules import ( + LayerNorm, + PositionalEmbedding, + TransformerDecoderLayer, +) + + +class TransformerDecoder(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + cfg, + dictionary, + embed_tokens, + no_encoder_attn=False, + ): + super().__init__(dictionary) + + self.dropout = cfg.decoder_dropout + self.share_input_output_embed = cfg.share_decoder_input_output_embed + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = cfg.decoder_embed_dim + self.output_embed_dim = cfg.decoder_embed_dim + + self.layerdrop = cfg.decoder_layerdrop + + padding_idx = embed_tokens.padding_idx + self.max_target_positions = cfg.max_target_positions + + self.embed_tokens = embed_tokens + # self.embed_scale = math.sqrt(embed_dim) # todo: try with input_embed_dim + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + + self.embed_positions = ( + PositionalEmbedding( + cfg.max_target_positions, + embed_dim, + padding_idx, + learned=cfg.decoder_learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + + # TODO: update this when transformer gets converted to dataclass configs + transformer_cfg = copy.deepcopy(cfg) + # with open_dict(transformer_cfg): + transformer_cfg.dropout = transformer_cfg.decoder_dropout + transformer_cfg.attention_dropout = ( + transformer_cfg.decoder_attention_dropout + ) + transformer_cfg.activation_dropout = ( + transformer_cfg.decoder_activation_dropout + ) + + self.layers = nn.ModuleList([]) + self.layers.extend( + [ + TransformerDecoderLayer(transformer_cfg, no_encoder_attn) + for _ in range(transformer_cfg.decoder_layers) + ] + ) + + if not self.share_input_output_embed: + self.embed_out = nn.Parameter( + torch.Tensor(len(dictionary), self.output_embed_dim) + ) + nn.init.normal_(self.embed_out, mean=0, std=self.output_embed_dim ** -0.5) + + if transformer_cfg.decoder_normalize_before: + self.layer_norm = LayerNorm(embed_dim) + else: + self.layer_norm = None + + def forward( + self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (Tensor, optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + prev_output_tokens = prev_output_tokens.long() + x, extra = self.extract_features( + prev_output_tokens, encoder_out, incremental_state + ) + x = self.output_layer(x) + return x, extra + + def extract_features( + self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused + ): + """ + Similar to *forward* but only return features. + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + + # embed positions + positions = ( + self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + if self.embed_positions is not None + else None + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + attn = None + + inner_states = [x] + + # decoder layers + for layer in self.layers: + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, attn, _ = layer( + x, + encoder_out["encoder_out"] if encoder_out is not None else None, + encoder_out["padding_mask"] if encoder_out is not None else None, + incremental_state, + self_attn_mask=self.buffered_future_mask(x) + if incremental_state is None + else None, + ) + inner_states.append(x) + + if self.layer_norm: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + return x, {"attn": attn, "inner_states": inner_states} + + def output_layer(self, features, **kwargs): + """Project features to the vocabulary size.""" + # project back to size of vocabulary + emb_mat = self.embed_tokens.weight if self.share_input_output_embed else self.embed_out + return torch.matmul(features, emb_mat.transpose(0, 1)) + # if self.share_input_output_embed: + # return F.linear(features, self.embed_tokens.weight) + # else: + # return F.linear(features, self.embed_out) + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + if ( + not hasattr(self, "_future_mask") + or self._future_mask is None + or self._future_mask.device != tensor.device + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(tensor.new(dim, dim)), 1 + ) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/models/resnet.py b/SpeechT5/VATLM/vat_hubert/vathubert/models/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..4e9436f531713a2f1cb26b38e148e0e66d3f3877 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/models/resnet.py @@ -0,0 +1,172 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import math +import torch.nn as nn +import pdb + + +logger = logging.getLogger(__name__) + +def conv3x3(in_planes, out_planes, stride=1): + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +def downsample_basic_block( inplanes, outplanes, stride ): + return nn.Sequential( + nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(outplanes), + ) + +def downsample_basic_block_v2( inplanes, outplanes, stride ): + return nn.Sequential( + nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False), + nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(outplanes), + ) + + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, relu_type = 'relu' ): + super(BasicBlock, self).__init__() + + assert relu_type in ['relu','prelu'] + + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + + if relu_type == 'relu': + self.relu1 = nn.ReLU(inplace=True) + self.relu2 = nn.ReLU(inplace=True) + elif relu_type == 'prelu': + self.relu1 = nn.PReLU(num_parameters=planes) + self.relu2 = nn.PReLU(num_parameters=planes) + else: + raise Exception('relu type not implemented') + + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu1(out) + out = self.conv2(out) + out = self.bn2(out) + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu2(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, relu_type = 'relu', gamma_zero = False, avg_pool_downsample = False): + self.inplanes = 64 + self.relu_type = relu_type + self.gamma_zero = gamma_zero + self.downsample_block = downsample_basic_block_v2 if avg_pool_downsample else downsample_basic_block + + super(ResNet, self).__init__() + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AdaptiveAvgPool2d(1) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + if self.gamma_zero: + for m in self.modules(): + if isinstance(m, BasicBlock ): + m.bn2.weight.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + + + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = self.downsample_block( inplanes = self.inplanes, + outplanes = planes * block.expansion, + stride = stride ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, relu_type = self.relu_type)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, relu_type = self.relu_type)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x = x.view(x.size(0), -1) + return x + +class ResEncoder(nn.Module): + def __init__(self, relu_type, weights): + super(ResEncoder, self).__init__() + self.frontend_nout = 64 + self.backend_out = 512 + frontend_relu = nn.PReLU(num_parameters=self.frontend_nout) if relu_type == 'prelu' else nn.ReLU() + self.frontend3D = nn.Sequential( + nn.Conv3d(1, self.frontend_nout, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False), + nn.BatchNorm3d(self.frontend_nout), + frontend_relu, + nn.MaxPool3d( kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1))) + self.trunk = ResNet(BasicBlock, [2, 2, 2, 2], relu_type=relu_type) + if weights is not None: + logger.info(f"Load {weights} for resnet") + std = torch.load(weights, map_location=torch.device('cpu'))['model_state_dict'] + frontend_std, trunk_std = OrderedDict(), OrderedDict() + for key, val in std.items(): + new_key = '.'.join(key.split('.')[1:]) + if 'frontend3D' in key: + frontend_std[new_key] = val + if 'trunk' in key: + trunk_std[new_key] = val + self.frontend3D.load_state_dict(frontend_std) + self.trunk.load_state_dict(trunk_std) + + def forward(self, x): + B, C, T, H, W = x.size() + x = self.frontend3D(x) + Tnew = x.shape[2] + x = self.threeD_to_2D_tensor(x) + x = self.trunk(x) + x = x.view(B, Tnew, x.size(1)) + x = x.transpose(1, 2).contiguous() + return x + + def threeD_to_2D_tensor(self, x): + n_batch, n_channels, s_time, sx, sy = x.shape + x = x.transpose(1, 2).contiguous() + return x.reshape(n_batch*s_time, n_channels, sx, sy) diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/models/utils.py b/SpeechT5/VATLM/vat_hubert/vathubert/models/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b04a5e67f99e20d1f26a8d9377a8de85188aa425 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/models/utils.py @@ -0,0 +1,301 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import cv2 +import torch +import random +import numpy as np +from typing import Dict, List, Optional, Tuple + +def load_video(path): + for i in range(3): + try: + cap = cv2.VideoCapture(path) + frames = [] + while True: + ret, frame = cap.read() + if ret: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + frames.append(frame) + else: + break + frames = np.stack(frames) + return frames + except Exception: + print(f"failed loading {path} ({i} / 3)") + if i == 2: + raise ValueError(f"Unable to load {path}") + + +class Compose(object): + """Compose several preprocess together. + Args: + preprocess (list of ``Preprocess`` objects): list of preprocess to compose. + """ + + def __init__(self, preprocess): + self.preprocess = preprocess + + def __call__(self, sample): + for t in self.preprocess: + sample = t(sample) + return sample + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.preprocess: + format_string += '\n' + format_string += ' {0}'.format(t) + format_string += '\n)' + return format_string + + +class Normalize(object): + """Normalize a ndarray image with mean and standard deviation. + """ + + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, frames): + """ + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be normalized. + Returns: + Tensor: Normalized Tensor image. + """ + frames = (frames - self.mean) / self.std + return frames + + def __repr__(self): + return self.__class__.__name__+'(mean={0}, std={1})'.format(self.mean, self.std) + +class CenterCrop(object): + """Crop the given image at the center + """ + def __init__(self, size): + self.size = size + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be cropped. + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + th, tw = self.size + delta_w = int(round((w - tw))/2.) + delta_h = int(round((h - th))/2.) + frames = frames[:, delta_h:delta_h+th, delta_w:delta_w+tw] + return frames + + +class RandomCrop(object): + """Crop the given image at the center + """ + + def __init__(self, size): + self.size = size + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be cropped. + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + th, tw = self.size + delta_w = random.randint(0, w-tw) + delta_h = random.randint(0, h-th) + frames = frames[:, delta_h:delta_h+th, delta_w:delta_w+tw] + return frames + + def __repr__(self): + return self.__class__.__name__ + '(size={0})'.format(self.size) + +class HorizontalFlip(object): + """Flip image horizontally. + """ + + def __init__(self, flip_ratio): + self.flip_ratio = flip_ratio + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be flipped with a probability flip_ratio + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + if random.random() < self.flip_ratio: + for index in range(t): + frames[index] = cv2.flip(frames[index], 1) + return frames + +def compute_mask_indices( + shape: Tuple[int, int], + padding_mask: Optional[torch.Tensor], + mask_prob: float, + mask_length: int, + mask_type: str = "static", + mask_other: float = 0.0, + min_masks: int = 0, + no_overlap: bool = False, + min_space: int = 0, +) -> np.ndarray: + """ + Computes random mask spans for a given shape + Args: + shape: the the shape for which to compute masks. + should be of size 2 where first element is batch size and 2nd is timesteps + padding_mask: optional padding mask of the same size as shape, which will prevent masking padded elements + mask_prob: probability for each token to be chosen as start of the span to be masked. this will be multiplied by + number of timesteps divided by length of mask span to mask approximately this percentage of all elements. + however due to overlaps, the actual number will be smaller (unless no_overlap is True) + mask_type: how to compute mask lengths + static = fixed size + uniform = sample from uniform distribution [mask_other, mask_length*2] + normal = sample from normal distribution with mean mask_length and stdev mask_other. mask is min 1 element + poisson = sample from possion distribution with lambda = mask length + min_masks: minimum number of masked spans + no_overlap: if false, will switch to an alternative recursive algorithm that prevents spans from overlapping + min_space: only used if no_overlap is True, this is how many elements to keep unmasked between spans + """ + + bsz, all_sz = shape + mask = np.full((bsz, all_sz), False) + + all_num_mask = int( + # add a random number for probabilistic rounding + mask_prob * all_sz / float(mask_length) + + np.random.rand() + ) + + all_num_mask = max(min_masks, all_num_mask) + + mask_idcs = [] + for i in range(bsz): + if padding_mask is not None: + sz = all_sz - padding_mask[i].long().sum().item() + num_mask = int( + # add a random number for probabilistic rounding + mask_prob * sz / float(mask_length) + + np.random.rand() + ) + num_mask = max(min_masks, num_mask) + else: + sz = all_sz + num_mask = all_num_mask + + if mask_type == "static": + lengths = np.full(num_mask, mask_length) + elif mask_type == "uniform": + lengths = np.random.randint(mask_other, mask_length * 2 + 1, size=num_mask) + elif mask_type == "normal": + lengths = np.random.normal(mask_length, mask_other, size=num_mask) + lengths = [max(1, int(round(x))) for x in lengths] + elif mask_type == "poisson": + lengths = np.random.poisson(mask_length, size=num_mask) + lengths = [int(round(x)) for x in lengths] + else: + raise Exception("unknown mask selection " + mask_type) + + if sum(lengths) == 0: + lengths[0] = min(mask_length, sz - 1) + + if no_overlap: + mask_idc = [] + + def arrange(s, e, length, keep_length): + span_start = np.random.randint(s, e - length) + mask_idc.extend(span_start + i for i in range(length)) + + new_parts = [] + if span_start - s - min_space >= keep_length: + new_parts.append((s, span_start - min_space + 1)) + if e - span_start - keep_length - min_space > keep_length: + new_parts.append((span_start + length + min_space, e)) + return new_parts + + parts = [(0, sz)] + min_length = min(lengths) + for length in sorted(lengths, reverse=True): + lens = np.fromiter( + (e - s if e - s >= length + min_space else 0 for s, e in parts), + np.int, + ) + l_sum = np.sum(lens) + if l_sum == 0: + break + probs = lens / np.sum(lens) + c = np.random.choice(len(parts), p=probs) + s, e = parts.pop(c) + parts.extend(arrange(s, e, length, min_length)) + mask_idc = np.asarray(mask_idc) + else: + min_len = min(lengths) + if sz - min_len <= num_mask: + min_len = sz - num_mask - 1 + + mask_idc = np.random.choice(sz - min_len, num_mask, replace=False) + + mask_idc = np.asarray( + [ + mask_idc[j] + offset + for j in range(len(mask_idc)) + for offset in range(lengths[j]) + ] + ) + + mask_idcs.append(np.unique(mask_idc[mask_idc < sz])) + + min_len = min([len(m) for m in mask_idcs]) + batch_indexes, starts, ends = [], [], [] + for i, mask_idc in enumerate(mask_idcs): + if len(mask_idc) > min_len: + mask_idc = np.random.choice(mask_idc, min_len, replace=False) + mask[i, mask_idc] = True + vals, run_starts, run_lengths = find_runs(mask[i]) + start_indices, lengths = run_starts[vals == True], run_lengths[vals == True] + starts.append(start_indices) + ends.append(start_indices+lengths) + batch_indexes.append(np.zeros([len(start_indices)])+i) + return mask, np.concatenate(starts).astype(np.int64), np.concatenate(ends).astype(np.int64), np.concatenate(batch_indexes).astype(np.int64) + +def find_runs(x): + """Find runs of consecutive items in an array.""" + + # ensure array + x = np.asanyarray(x) + if x.ndim != 1: + raise ValueError('only 1D array supported') + n = x.shape[0] + + # handle empty array + if n == 0: + return np.array([]), np.array([]), np.array([]) + + else: + # find run starts + loc_run_start = np.empty(n, dtype=bool) + loc_run_start[0] = True + np.not_equal(x[:-1], x[1:], out=loc_run_start[1:]) + run_starts = np.nonzero(loc_run_start)[0] + + # find run values + run_values = x[loc_run_start] + + # find run lengths + run_lengths = np.diff(np.append(run_starts, n)) + + return run_values, run_starts, run_lengths diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/models/vathubert.py b/SpeechT5/VATLM/vat_hubert/vathubert/models/vathubert.py new file mode 100644 index 0000000000000000000000000000000000000000..a172b4a87d511e1b8bc02a5f6f3ffe08807cb973 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/models/vathubert.py @@ -0,0 +1,851 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import os,sys +import logging +from typing import Dict, List, Optional, Tuple + +import numpy as np + +import torch +import torch.nn as nn +from dataclasses import dataclass, field +from fairseq import utils +from fairseq.data.data_utils import compute_mask_indices +from fairseq.data.dictionary import Dictionary +from fairseq.dataclass import ChoiceEnum, FairseqDataclass +from fairseq.models import BaseFairseqModel, register_model +from fairseq.models.wav2vec.wav2vec2 import ( + ConvFeatureExtractionModel, + TransformerEncoder, +) +from fairseq.modules import GradMultiply, LayerNorm +from copy import deepcopy + +DBG=True if len(sys.argv) == 1 else False + +if DBG: + from vathubert.tasks.vathubert_pretraining import ( + VATHubertPretrainingConfig, + VATHubertPretrainingTask, + ) + from resnet import ResEncoder + logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, + ) + from utils import compute_mask_indices + from decoder import TransformerDecoder + +else: + from vathubert.tasks.vathubert_pretraining import ( + VATHubertPretrainingConfig, + VATHubertPretrainingTask, + ) + from vathubert.models.resnet import ResEncoder + from vathubert.models.utils import compute_mask_indices + from vathubert.models.decoder import TransformerDecoder + +from omegaconf import II + +logger = logging.getLogger(__name__) + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum( + ["static", "uniform", "normal", "poisson"] +) + + +@dataclass +class VATHubertConfig(FairseqDataclass): + label_rate: int = II("task.label_rate") + modalities: str = II("task.modalities") + extractor_mode: EXTRACTOR_MODE_CHOICES = field( + default="default", + metadata={ + "help": "mode for feature extractor. default has a single group " + "norm with d groups in the first conv block, whereas layer_norm " + "has layer norms in every block (meant to use with normalize=True)" + }, + ) + encoder_layers: int = field( + default=12, metadata={"help": "num encoder layers in the transformer"} + ) + encoder_embed_dim: int = field( + default=768, metadata={"help": "encoder embedding dimension"} + ) + encoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "encoder embedding dimension for FFN"} + ) + encoder_attention_heads: int = field( + default=12, metadata={"help": "num encoder attention heads"} + ) + activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field( + default="gelu", metadata={"help": "activation function to use"} + ) + + # dropouts + dropout: float = field( + default=0.1, + metadata={"help": "dropout probability for the transformer"}, + ) + attention_dropout: float = field( + default=0.1, + metadata={"help": "dropout probability for attention weights"}, + ) + activation_dropout: float = field( + default=0.0, + metadata={"help": "dropout probability after activation in FFN"}, + ) + encoder_layerdrop: float = field( + default=0.0, + metadata={"help": "probability of dropping a tarnsformer layer"}, + ) + dropout_input: float = field( + default=0.0, + metadata={"help": "dropout to apply to the input (after feat extr)"}, + ) + dropout_features: float = field( + default=0.0, + metadata={ + "help": "dropout to apply to the features (after feat extr)" + }, + ) + + final_dim: int = field( + default=0, + metadata={ + "help": "project final representations and targets to this many " + "dimensions. set to encoder_embed_dim is <= 0" + }, + ) + untie_final_proj: bool = field( + default=False, + metadata={"help": "use separate projection for each target"}, + ) + layer_norm_first: bool = field( + default=False, + metadata={"help": "apply layernorm first in the transformer"}, + ) + conv_feature_layers: str = field( + default="[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2", + metadata={ + "help": "string describing convolutional feature extraction " + "layers in form of a python list that contains " + "[(dim, kernel_size, stride), ...]" + }, + ) + conv_bias: bool = field( + default=False, metadata={"help": "include bias in conv encoder"} + ) + logit_temp: float = field( + default=0.1, metadata={"help": "temperature to divide logits by"} + ) + target_glu: bool = field( + default=False, metadata={"help": "adds projection + glu to targets"} + ) + feature_grad_mult: float = field( + default=1.0, + metadata={"help": "multiply feature extractor var grads by this"}, + ) + + # masking + mask_length_audio: int = field(default=10, metadata={"help": "mask length"}) + mask_prob_audio: float = field( + default=0.65, + metadata={"help": "probability of replacing a token with mask"}, + ) + mask_length_image: int = field(default=10, metadata={"help": "mask length"}) + mask_prob_image: float = field( + default=0.65, + metadata={"help": "probability of replacing a token with mask"}, + ) + mask_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose mask length"} + ) + mask_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_overlap: bool = field( + default=False, metadata={"help": "whether to allow masks to overlap"} + ) + mask_min_space: int = field( + default=1, + metadata={ + "help": "min space between spans (if no overlap is enabled)" + }, + ) + + # channel masking + mask_channel_length: int = field( + default=10, + metadata={"help": "length of the mask for features (channels)"}, + ) + mask_channel_prob: float = field( + default=0.0, + metadata={"help": "probability of replacing a feature with 0"}, + ) + mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", + metadata={"help": "how to choose mask length for channel masking"}, + ) + mask_channel_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indicesh" + }, + ) + no_mask_channel_overlap: bool = field( + default=False, + metadata={"help": "whether to allow channel masks to overlap"}, + ) + mask_channel_min_space: int = field( + default=1, + metadata={ + "help": "min space between spans (if no overlap is enabled)" + }, + ) + + # positional embeddings + conv_pos: int = field( + default=128, + metadata={ + "help": "number of filters for convolutional positional embeddings" + }, + ) + conv_pos_groups: int = field( + default=16, + metadata={ + "help": "number of groups for convolutional positional embedding" + }, + ) + + latent_temp: Tuple[float, float, float] = field( + default=(2, 0.5, 0.999995), + metadata={"help": "legacy (to be removed)"}, + ) + + # loss computation + skip_masked: bool = field( + default=False, + metadata={"help": "skip computing losses over masked frames"}, + ) + skip_nomask: bool = field( + default=False, + metadata={"help": "skip computing losses over unmasked frames"}, + ) + resnet_relu_type: str = field(default='prelu', metadata={"help": 'relu type for resnet'}) + resnet_weights: Optional[str] = field(default=None, metadata={"help": 'resnet weights'}) + sim_type: str = field(default='cosine', metadata={"help": 'similarity type'}) + + sub_encoder_layers: int = field(default=0, metadata={'help': 'number of transformer layers for single modality'}) + audio_feat_dim: int = field(default=-1, metadata={'help': 'audio feature dimension'}) + modality_dropout: float = field(default=0, metadata={'help': 'drop one modality'}) + audio_dropout: float = field(default=0, metadata={'help': 'drop audio feature'}) + modality_fuse: str = field(default='concat', metadata={'help': 'fusing two modalities: add,concat'}) + selection_type : str = field(default='same_other_seq', metadata={'help': 'type of selectig images, same_other_seq: replace masked span with span from another sequence, same_seq: repace masked span with span of the same sequence'}) + masking_type : str = field(default='input', metadata={'help': 'input or feature masking'}) + + decoder_embed_dim: int = field( + default=768, metadata={"help": "decoder embedding dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_layers: int = field( + default=6, metadata={"help": "num of decoder layers"} + ) + decoder_layerdrop: float = field( + default=0.0, metadata={"help": "decoder layerdrop chance"} + ) + decoder_attention_heads: int = field( + default=4, metadata={"help": "num decoder attention heads"} + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "use learned positional embeddings in the decoder"}, + ) + decoder_normalize_before: bool = field( + default=False, + metadata={"help": "apply layernorm before each decoder block"}, + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={ + "help": "if set, disables positional embeddings " + "(outside self attention)" + }, + ) + decoder_dropout: float = field( + default=0.1, metadata={"help": "dropout probability in the decoder"} + ) + decoder_attention_dropout: float = field( + default=0.1, + metadata={ + "help": "dropout probability for attention weights " + "inside the decoder" + }, + ) + decoder_activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN " + "inside the decoder" + }, + ) + max_target_positions: int = field( + default=2048, metadata={"help": "max target positions"} + ) + share_decoder_input_output_embed: bool = field( + default=False, + metadata={"help": "share decoder input and output embeddings"}, + ) + no_scale_embedding: bool = field(default=True, metadata={'help': 'scale embedding'}) + +class SubModel(nn.Module): + def __init__(self, resnet=None, input_dim=None, cfg=None): + super().__init__() + self.resnet = resnet + self.proj = nn.Linear(input_dim, cfg.encoder_embed_dim) + self.encoder = TransformerEncoder(cfg) if cfg.encoder_layers > 0 else None + + def forward(self, x): + if self.resnet is not None: + x = self.resnet(x) + x = self.proj(x.transpose(1, 2)) + if self.encoder is not None: + x = self.encoder(x)[0].transpose(1, 2) + else: + x = x.transpose(1, 2) + return x + +@register_model("vat_hubert", dataclass=VATHubertConfig) +class VATHubertModel(BaseFairseqModel): + def __init__( + self, + cfg: VATHubertConfig, + task_cfg: VATHubertPretrainingConfig, + dictionaries: List[Dictionary], + **kwargs + ) -> None: + super().__init__() + logger.info(f"HubertModel Config: {cfg}") + + feature_ds_rate = 1 + self.feat2tar_ratio = cfg.label_rate * feature_ds_rate / task_cfg.sample_rate + sub_cfg = deepcopy(cfg) + sub_cfg.encoder_layers = sub_cfg.sub_encoder_layers + resnet = ResEncoder(relu_type=cfg.resnet_relu_type, weights=cfg.resnet_weights) + self.feature_extractor_audio = SubModel(resnet=None, input_dim=cfg.audio_feat_dim, cfg=sub_cfg) + self.feature_extractor_video = SubModel(resnet=resnet, input_dim=resnet.backend_out, cfg=sub_cfg) + self.modality_dropout, self.audio_dropout = cfg.modality_dropout, cfg.audio_dropout + self.modality_fuse = cfg.modality_fuse + self.encoder_embed_dim = cfg.encoder_embed_dim + if self.modality_fuse == 'concat': + self.embed = cfg.encoder_embed_dim * 3 + elif self.modality_fuse == 'add': + self.embed = cfg.encoder_embed_dim + self.post_extract_proj = ( + nn.Linear(self.embed, cfg.encoder_embed_dim) + if self.embed != cfg.encoder_embed_dim + else None + ) + + self.mask_prob_image, self.mask_prob_audio = cfg.mask_prob_image, cfg.mask_prob_audio + self.mask_selection = cfg.mask_selection + self.mask_other = cfg.mask_other + self.mask_length_image, self.mask_length_audio = cfg.mask_length_image, cfg.mask_length_audio + self.no_mask_overlap = cfg.no_mask_overlap + self.mask_min_space = cfg.mask_min_space + + self.mask_channel_prob = cfg.mask_channel_prob + self.mask_channel_selection = cfg.mask_channel_selection + self.mask_channel_other = cfg.mask_channel_other + self.mask_channel_length = cfg.mask_channel_length + self.no_mask_channel_overlap = cfg.no_mask_channel_overlap + self.mask_channel_min_space = cfg.mask_channel_min_space + + self.dropout_input = nn.Dropout(cfg.dropout_input) + self.dropout_features = nn.Dropout(cfg.dropout_features) + + self.feature_grad_mult = cfg.feature_grad_mult + self.logit_temp = cfg.logit_temp + self.skip_masked = cfg.skip_masked + self.skip_nomask = cfg.skip_nomask + self.sim_type = cfg.sim_type + self.selection_type = cfg.selection_type + self.masking_type = cfg.masking_type + + final_dim = ( + cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim + ) + + self.mask_emb = nn.Parameter( + torch.FloatTensor(cfg.audio_feat_dim).uniform_() if self.masking_type == 'input' else torch.FloatTensor(cfg.encoder_embed_dim).uniform_() + ) + + self.encoder = TransformerEncoder(cfg) + self.layer_norm = LayerNorm(self.embed) + + self.target_glu = None + if cfg.target_glu: + self.target_glu = nn.Sequential( + nn.Linear(final_dim, final_dim * 2), nn.GLU() + ) + + self.untie_final_proj = cfg.untie_final_proj + if self.untie_final_proj: + self.final_proj = nn.Linear( + cfg.encoder_embed_dim, final_dim * len(dictionaries) + ) + else: + self.final_proj = nn.Linear(cfg.encoder_embed_dim, final_dim) + + # modules below are not needed during fine-tuning + if any([d is None for d in dictionaries]): + logger.info( + "cannot find dictionary. assume will be used for fine-tuning" + ) + else: + self.num_classes = [len(d) for d in dictionaries] + self.label_embs_concat = nn.Parameter( + torch.FloatTensor(sum(self.num_classes), final_dim) + ) + nn.init.uniform_(self.label_embs_concat) + + self.phone_embed = nn.Embedding(46, cfg.encoder_embed_dim) + self.phone_conv = nn.Sequential( + nn.Conv1d(in_channels=cfg.encoder_embed_dim, out_channels=cfg.encoder_embed_dim, kernel_size=3, stride=2, padding=1), + nn.ReLU(), + nn.Conv1d(in_channels=cfg.encoder_embed_dim, out_channels=cfg.encoder_embed_dim, kernel_size=3, stride=2, padding=1), + ) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: VATHubertConfig, task: VATHubertPretrainingTask): + """Build a new model instance.""" + + kwargs = {} + model = VATHubertModel(cfg, task.cfg, task.dictionaries, **kwargs) + return model + + def apply_input_mask(self, x, padding_mask, target_list): + B, C, T = x.shape[:3] + is_audio = True if len(x.shape) == 3 else False + if is_audio: + mask_prob, mask_length = self.mask_prob_audio, self.mask_length_audio + else: + mask_prob, mask_length = self.mask_prob_image, self.mask_length_image + if mask_prob > 0: + + mask_indices, starts, ends, batch_indexes = compute_mask_indices( + (B, T), + padding_mask, + mask_prob, + mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices_np = mask_indices + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x = x.transpose(1, 2).contiguous() # [B, T, C, H, W] + if B == 1: + x[mask_indices] = 0 + elif is_audio: + x[mask_indices] = self.mask_emb + elif self.selection_type == 'same_other_seq': + perm = (torch.arange(B) + torch.randint(low=1, high=B, size=(1,))) % B + x_perm = x[perm] + x[mask_indices] = x_perm[mask_indices] + elif self.selection_type == 'same_seq': + batch_indexes_, other_indexes = [], [] + for batch_index, start, end in zip(batch_indexes, starts, ends): + length = end-start + other_start = np.setdiff1d(np.arange(T), np.arange(max(0, start-length), end)) + if len(other_start) > 0: + other_start = np.random.choice(other_start, size=1) + else: + other_start = 0 + other_end = other_start + length + other_indexes.append(np.arange(other_start, other_end).clip(max=T-1)) + batch_indexes_.append(np.zeros([length], dtype=np.int64)+batch_index) + batch_indexes, other_indexes = np.concatenate(batch_indexes_), np.concatenate(other_indexes) + x[mask_indices] = x[batch_indexes, other_indexes] + + x = x.transpose(1, 2).contiguous() + else: + mask_indices = None + + if self.mask_channel_prob > 0: + logger.info(f"No mask channel prob for input masking") + return x, mask_indices + + def apply_feature_mask(self, x, padding_mask, target_list): + B, T, C = x.shape + assert self.mask_prob_audio == self.mask_prob_image and self.mask_length_audio == self.mask_length_image, f"masking prob/length for image/audio be same for feature masking" + mask_prob, mask_length = self.mask_prob_audio, self.mask_length_image + if mask_prob > 0: + mask_indices, _, _, _ = compute_mask_indices( + (B, T), + padding_mask, + mask_prob, + mask_length, + self.mask_selection, + self.mask_other, + min_masks=2, + no_overlap=self.no_mask_overlap, + min_space=self.mask_min_space, + ) + mask_indices = torch.from_numpy(mask_indices).to(x.device) + x[mask_indices] = self.mask_emb + else: + mask_indices = None + + if self.mask_channel_prob > 0: + mask_channel_indices, _, _, _ = compute_mask_indices( + (B, C), + None, + self.mask_channel_prob, + self.mask_channel_length, + self.mask_channel_selection, + self.mask_channel_other, + no_overlap=self.no_mask_channel_overlap, + min_space=self.mask_channel_min_space, + ) + mask_channel_indices = ( + torch.from_numpy(mask_channel_indices) + .to(x.device) + .unsqueeze(1) + .expand(-1, T, -1) + ) + x[mask_channel_indices] = 0 + + return x, mask_indices + + def forward_features(self, source: torch.Tensor, modality: str) -> torch.Tensor: + extractor = eval(f"self.feature_extractor_{modality}") + if self.feature_grad_mult > 0: + features = extractor(source) + if self.feature_grad_mult != 1.0: + features = GradMultiply.apply(features, self.feature_grad_mult) + else: + with torch.no_grad(): + features = extractor(source) + return features + + def forward_targets( + self, features: torch.Tensor, mask_indices: torch.Tensor, target_list: List[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Trim features to ensure labels exist and then get aligned labels + feat_tsz = features.size(2) + targ_tsz = min([t.size(1) for t in target_list]) + if self.feat2tar_ratio * feat_tsz > targ_tsz: + feat_tsz = int(targ_tsz / self.feat2tar_ratio) + features = features[..., :feat_tsz] + if mask_indices is not None: + mask_indices = mask_indices[..., :feat_tsz] + target_inds = torch.arange(feat_tsz).float() * self.feat2tar_ratio + target_list = [t[:, target_inds.long()] for t in target_list] + return features, mask_indices, target_list + + def forward_padding_mask( + self, features: torch.Tensor, padding_mask: torch.Tensor, + ) -> torch.Tensor: + extra = padding_mask.size(1) % features.size(1) + if extra > 0: + padding_mask = padding_mask[:, :-extra] + padding_mask = padding_mask.view( + padding_mask.size(0), features.size(1), -1 + ) + padding_mask = padding_mask.all(-1) + return padding_mask + + def compute_logits(self, feats, emb_mat): + # feats: [B, T, F], emb_mat: [V, F] + if self.sim_type == 'dot': + logits = torch.matmul(feats, emb_mat.transpose(0, 1)) + elif self.sim_type == 'cosine': + batch_size, timesteps, emb_dim = feats.size() + feats_ = feats.view(-1, emb_dim) + nom = (feats_.unsqueeze(dim=1) * emb_mat.unsqueeze(dim=0)).sum(dim=-1) # [B*T, V] + denom = (feats_**2).sum(dim=-1).sqrt().unsqueeze(dim=1) * (emb_mat**2).sum(dim=-1).sqrt().unsqueeze(dim=0) # [B*T, V] + logits = (nom/denom.clamp(min=1e-6)).view(batch_size, timesteps, -1) + else: + raise NotImplementedError + logits = logits / self.logit_temp + return logits + + def forward( + self, + source: torch.Tensor, + target_list: Optional[List[torch.Tensor]] = None, + targets_phone_list: Optional[List[torch.Tensor]] = None, + extra_text_phone_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None + ) -> Dict[str, torch.Tensor]: + """output layer is 1-based""" + + if not extra_text_phone_list: + src_audio, src_video = source['audio'], source['video'] # src_audio:[B, D1, T], [B, 1, T, 88, 88] + + if mask and self.masking_type == 'input': + src_video, mask_indices_video = self.apply_input_mask(src_video, padding_mask, target_list) + src_audio, mask_indices_audio = self.apply_input_mask(src_audio, padding_mask, target_list) + mask_indices = torch.logical_or(mask_indices_audio, mask_indices_video) + else: + src_audio, src_video, mask_indices = src_audio, src_video, None + + + if src_audio is not None and src_video is None: + features_audio = self.forward_features(src_audio, modality='audio') # features: [B, F, T] + features_video = features_audio.new_zeros(features_audio.size(0), features_audio.size(1), features_audio.size(-1)) + elif src_audio is None and src_video is not None: + features_video = self.forward_features(src_video, modality='video') + features_audio = features_video.new_zeros(features_video.size(0), features_video.size(1), features_video.size(-1)) + elif src_audio is not None and src_video is not None: + features_video = self.forward_features(src_video, modality='video') + features_audio = self.forward_features(src_audio, modality='audio') # features: [B, F, T] + + + if targets_phone_list is not None: + phone_sequence = targets_phone_list[0] + phone_embedding = self.phone_embed(phone_sequence) + + feature_phone = self.phone_conv(phone_embedding.transpose(1,2)) + + if targets_phone_list is None and src_audio is not None: + feature_phone = features_audio.new_zeros(features_audio.size(0), features_audio.size(1), features_audio.size(-1)) + + if targets_phone_list is None and src_video is not None: + feature_phone = features_video.new_zeros(features_video.size(0), features_video.size(1), features_video.size(-1)) + + + + if features_audio.size(-1) != feature_phone.size(-1): + diff = features_audio.size(-1) - feature_phone.size(-1) + + if diff >=0: + phone_pad_zero = torch.zeros(features_audio.size(0), features_audio.size(1), diff).type_as(feature_phone) + feature_phone = torch.cat((feature_phone, phone_pad_zero), dim=-1) + else: + feature_phone = feature_phone[:,:,:features_audio.size(-1)] + + else: + + phone_sequence = extra_text_phone_list[0] + phone_embedding = self.phone_embed(phone_sequence) + feature_phone = self.phone_conv(phone_embedding.transpose(1,2)) + features_audio = feature_phone.new_zeros(feature_phone.size(0), feature_phone.size(1), feature_phone.size(-1)) + features_video = feature_phone.new_zeros(feature_phone.size(0), feature_phone.size(1), feature_phone.size(-1)) + + mask_indices=None + padding_mask = torch.zeros(feature_phone.size(0), feature_phone.size(-1)).to(torch.bool).cuda() + + + + + modality_drop_prob, audio_drop_prob = np.random.random(), np.random.random() + if self.training: + if modality_drop_prob < self.modality_dropout: + if audio_drop_prob < self.audio_dropout: + features_audio = 0 * features_audio + else: + features_video = 0 * features_video + + + if self.modality_fuse == 'concat': + features = torch.cat([features_audio, features_video, feature_phone], dim=1) + elif self.modality_fuse == 'add': + features = features_audio + features_video + feature_phone + + + if target_list is not None: + features, mask_indices, target_list = self.forward_targets(features, mask_indices, target_list) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) # [B, T, 1536] + features = self.layer_norm(features) + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + + if self.masking_type == 'feature' and mask: + x, mask_indices = self.apply_feature_mask(features, padding_mask, target_list) + else: + x = features + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, _ = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1 + ) # [B, T, 768] + + if features_only: + return {"x": x, "padding_mask": padding_mask, "features": features} + + label_embs_list = self.label_embs_concat.split(self.num_classes, 0) # list to tuple + proj_x = self.final_proj(x) + if self.untie_final_proj: # True + proj_x_list = proj_x.chunk(len(self.num_classes), dim=-1) + else: + proj_x_list = [proj_x for _ in self.num_classes] + logit_list = [self.compute_logits(proj, emb).view(-1, num_class) for proj, emb, num_class in zip(proj_x_list, label_embs_list, self.num_classes)] # [[B*T, V]] + mask, unmask = torch.logical_and(mask_indices, ~padding_mask).view(-1), torch.logical_and(~mask_indices, ~padding_mask).view(-1) # [B*T] + logit_m_list, logit_u_list = [logit[mask] for logit in logit_list], [logit[unmask] for logit in logit_list] + target_m_list, target_u_list = [target.view(-1)[mask].long() for target in target_list], [target.view(-1)[unmask].long() for target in target_list] + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "target_m_list": target_m_list, + "target_u_list": target_u_list, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + return result + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + ) -> Tuple[torch.Tensor, torch.Tensor]: + res = self.forward( + source, + padding_mask=padding_mask, + mask=mask, + features_only=True, + output_layer=output_layer, + ) + feature = res["features"] if ret_conv else res["x"] + return feature, res["padding_mask"] + + def extract_finetune(self, source, padding_mask=None, mask=False, ret_conv=False, output_layer=None): + src_audio, src_video = source['audio'], source['video'] + if mask and self.masking_type == 'input': + src_video, mask_indices_video = self.apply_input_mask(src_video, padding_mask, target_list=None) + src_audio, mask_indices_audio = self.apply_input_mask(src_audio, padding_mask, target_list=None) + mask_indices = torch.logical_or(mask_indices_audio, mask_indices_video) # mask_indices not used in fine-tuning + else: + src_audio, src_video, mask_indices = src_audio, src_video, None + + if src_audio is not None and src_video is None: + features_audio = self.forward_features(src_audio, modality='audio') # features: [B, F, T] + features_video = features_audio.new_zeros(features_audio.size(0), self.encoder_embed_dim, features_audio.size(-1)) + feature_phone = features_audio.new_zeros(features_audio.size(0), features_audio.size(1), features_audio.size(-1)) + elif src_audio is None and src_video is not None: + features_video = self.forward_features(src_video, modality='video') + features_audio = features_video.new_zeros(features_video.size(0), self.encoder_embed_dim, features_video.size(-1)) + feature_phone = features_video.new_zeros(features_video.size(0), features_video.size(1), features_video.size(-1)) + elif src_audio is not None and src_video is not None: + features_video = self.forward_features(src_video, modality='video') + features_audio = self.forward_features(src_audio, modality='audio') # features: [B, F, T] + feature_phone = features_video.new_zeros(features_video.size(0), features_video.size(1), features_video.size(-1)) + + if self.modality_fuse == 'concat': + features = torch.cat([features_audio, features_video, feature_phone], dim=1) + elif self.modality_fuse == 'add': + features = features_audio + features_video + feature_phone + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + x = features + mask_indices = None + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, _ = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1 + ) + + return x, padding_mask + + + def get_extra_losses(self, net_output): + extra_losses = [] + names = [] + if "features_pen" in net_output: + extra_losses.append(net_output["features_pen"]) + names.append("features_pen") + + return extra_losses, names + + def remove_pretraining_modules(self): + self.target_glu = None + self.final_proj = None + self.label_embs_concat = None + self.mask_emb = None + + def get_logits(self, net_output, is_masked=True): + raise NotImplementedError + + def get_targets(self, net_output, is_masked=True): + raise NotImplementedError + + def compute_nce(self, x, pos, negs): + neg_is_pos = (pos == negs).all(-1) + pos = pos.unsqueeze(0) + targets = torch.cat([pos, negs], dim=0) + + logits = torch.cosine_similarity( + x.float(), targets.float(), dim=-1 + ).type_as(x) + logits /= self.logit_temp + if neg_is_pos.any(): + logits[1:][neg_is_pos] = float("-inf") + logits = logits.transpose(0, 1) # (num_x, num_cls+1) + return logits diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/models/vathubert_asr.py b/SpeechT5/VATLM/vat_hubert/vathubert/models/vathubert_asr.py new file mode 100644 index 0000000000000000000000000000000000000000..a9902a9844c94e800ba2ef5967ed1993e81a9e48 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/models/vathubert_asr.py @@ -0,0 +1,481 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + + +import sys,logging +import contextlib +import tempfile +from argparse import Namespace +from typing import Any, Optional + +import torch +import torch.nn as nn +from dataclasses import dataclass, field +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import BaseFairseqModel, FairseqEncoder, FairseqEncoderDecoderModel, register_model +from fairseq.models.hubert.hubert import MASKING_DISTRIBUTION_CHOICES +from fairseq.tasks import FairseqTask +from omegaconf import II, MISSING + +DBG=True if len(sys.argv) == 1 else False + +if DBG: + from vathubert.models.vathubert import VATHubertModel + from vathubert.models.decoder import TransformerDecoder +else: + from vathubert.models.vathubert import VATHubertModel + from vathubert.models.decoder import TransformerDecoder + +logger = logging.getLogger(__name__) + + +@dataclass +class VATHubertAsrConfig(FairseqDataclass): + w2v_path: str = field( + default=MISSING, metadata={"help": "path to hubert model"} + ) + no_pretrained_weights: bool = field( + default=False, + metadata={"help": "if true, does not load pretrained weights"}, + ) + dropout_input: float = field( + default=0.0, + metadata={"help": "dropout to apply to the input (after feat extr)"}, + ) + final_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout after transformer and before final projection" + }, + ) + dropout: float = field( + default=0.0, + metadata={"help": "dropout probability inside hubert model"}, + ) + attention_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability for attention weights " + "inside hubert model" + }, + ) + activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN " + "inside hubert model" + }, + ) + + # masking + apply_mask: bool = field( + default=False, metadata={"help": "apply masking during fine-tuning"} + ) + mask_length: int = field( + default=10, metadata={"help": "repeat the mask indices multiple times"} + ) + mask_prob: float = field( + default=0.5, + metadata={ + "help": "probability of replacing a token with mask " + "(normalized by length)" + }, + ) + mask_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", metadata={"help": "how to choose masks"} + ) + mask_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indices" + }, + ) + no_mask_overlap: bool = field( + default=False, metadata={"help": "whether to allow masks to overlap"} + ) + + # channel masking + mask_channel_length: int = field( + default=10, + metadata={"help": "length of the mask for features (channels)"}, + ) + mask_channel_prob: float = field( + default=0.0, + metadata={"help": "probability of replacing a feature with 0"}, + ) + mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field( + default="static", + metadata={"help": "how to choose mask length for channel masking"}, + ) + mask_channel_other: float = field( + default=0, + metadata={ + "help": "secondary mask argument " + "(used for more complex distributions), " + "see help in compute_mask_indices" + }, + ) + no_mask_channel_overlap: bool = field( + default=False, + metadata={"help": "whether to allow channel masks to overlap"}, + ) + freeze_finetune_updates: int = field( + default=0, + metadata={"help": "dont finetune hubert for this many updates"}, + ) + feature_grad_mult: float = field( + default=0.0, + metadata={"help": "reset feature grad mult in hubert to this"}, + ) + layerdrop: float = field( + default=0.0, + metadata={"help": "probability of dropping a layer in hubert"}, + ) + normalize: bool = II("task.normalize") + data: str = II("task.data") + + # this holds the loaded hubert args + w2v_args: Any = None + + +@dataclass +class VATHubertSeq2SeqConfig(VATHubertAsrConfig): + decoder_embed_dim: int = field( + default=768, metadata={"help": "decoder embedding dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_layers: int = field( + default=6, metadata={"help": "num of decoder layers"} + ) + decoder_layerdrop: float = field( + default=0.0, metadata={"help": "decoder layerdrop chance"} + ) + decoder_attention_heads: int = field( + default=4, metadata={"help": "num decoder attention heads"} + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "use learned positional embeddings in the decoder"}, + ) + decoder_normalize_before: bool = field( + default=False, + metadata={"help": "apply layernorm before each decoder block"}, + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={ + "help": "if set, disables positional embeddings " + "(outside self attention)" + }, + ) + decoder_dropout: float = field( + default=0.0, metadata={"help": "dropout probability in the decoder"} + ) + decoder_attention_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability for attention weights " + "inside the decoder" + }, + ) + decoder_activation_dropout: float = field( + default=0.0, + metadata={ + "help": "dropout probability after activation in FFN " + "inside the decoder" + }, + ) + max_target_positions: int = field( + default=2048, metadata={"help": "max target positions"} + ) + share_decoder_input_output_embed: bool = field( + default=False, + metadata={"help": "share decoder input and output embeddings"}, + ) + no_scale_embedding: bool = field(default=True, metadata={'help': 'scale embedding'}) + +class HubertEncoder(FairseqEncoder): + def __init__(self, cfg: VATHubertAsrConfig, tgt_dict=None): + self.apply_mask = cfg.apply_mask + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + } + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu( + cfg.w2v_path, arg_overrides + ) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf( + w2v_args + ) + + assert cfg.normalize == w2v_args.task.normalize, ( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + w2v_args.task.data = cfg.data + + task = tasks.setup_task(w2v_args.task) + model = task.build_model(w2v_args.model) + + if state is not None and not cfg.no_pretrained_weights: + # set strict=False because we omit some modules + model.load_state_dict(state["model"], strict=False) + + model.remove_pretraining_modules() + + super().__init__(task.source_dictionary) + + d = model.encoder.embedding_dim + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.num_updates = 0 + + if tgt_dict is not None: + self.proj = Linear(d, len(tgt_dict)) + elif getattr(cfg, "decoder_embed_dim", d) != d: + self.proj = Linear(d, cfg.decoder_embed_dim) + else: + self.proj = None + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, source, padding_mask, tbc=True, **kwargs): + + w2v_args = { + "source": source, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + } + ft = self.freeze_finetune_updates <= self.num_updates + + with torch.no_grad() if not ft else contextlib.ExitStack(): + x, padding_mask = self.w2v_model.extract_finetune(**w2v_args) + + if tbc: + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + x = self.final_dropout(x) + + if self.proj: + x = self.proj(x) + + return { + "encoder_out": x, # T x B x C + "encoder_padding_mask": padding_mask, # B x T + "padding_mask": padding_mask, + } + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = encoder_out[ + "encoder_out" + ].index_select(1, new_order) + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +class HubertEncoderWrapper(FairseqEncoder): + def __init__(self, w2v_model): + super().__init__(None) + self.w2v_model = w2v_model + + def forward(self, source, padding_mask, **kwargs): + w2v_args = { + "source": source, + "padding_mask": padding_mask, + } + + x, padding_mask = self.w2v_model.extract_finetune(**w2v_args) + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + return { + "encoder_out": x, # T x B x C + "encoder_padding_mask": padding_mask, # B x T + "padding_mask": padding_mask + } + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + encoder_out["encoder_out"] = encoder_out[ + "encoder_out" + ].index_select(1, new_order) + if encoder_out["encoder_padding_mask"] is not None: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + if encoder_out["padding_mask"] is not None: + encoder_out["padding_mask"] = encoder_out[ + "padding_mask" + ].index_select(0, new_order) + return encoder_out + +@register_model("vat_hubert_seq2seq", dataclass=VATHubertSeq2SeqConfig) +class VATHubertSeq2Seq(FairseqEncoderDecoderModel): + def __init__(self, encoder, decoder, tgt_dict, cfg): + super().__init__(encoder, decoder) + self.cfg = cfg + self.freeze_finetune_updates = cfg.freeze_finetune_updates + + @classmethod + def build_model(cls, cfg, task): + """Build a new model instance.""" + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + } + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu( + cfg.w2v_path, arg_overrides + ) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf( + w2v_args + ) + + assert cfg.normalize == w2v_args.task.normalize, ( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + w2v_args.task.data = cfg.data + + task_pretrain = tasks.setup_task(w2v_args.task) + if state is not None: + task_pretrain.load_state_dict(state['task_state']) + + encoder_ = task_pretrain.build_model(w2v_args.model) + + encoder = HubertEncoderWrapper(encoder_) + if state is not None and not cfg.no_pretrained_weights: + # set strict=False because we omit some modules + del state['model']['mask_emb'] + del state['model']['label_embs_concat'] + + encoder.w2v_model.load_state_dict(state["model"], strict=False) + + encoder.w2v_model.remove_pretraining_modules() + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + + def build_embedding(dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + emb = Embedding(num_embeddings, embed_dim, padding_idx=padding_idx) + return emb + + decoder_embed_tokens = build_embedding(tgt_dict, cfg.decoder_embed_dim) + decoder = TransformerDecoder(cfg, tgt_dict, decoder_embed_tokens) + + return VATHubertSeq2Seq(encoder, decoder, tgt_dict, cfg) + + + def forward(self, **kwargs): + ft = self.freeze_finetune_updates <= self.num_updates + with torch.no_grad() if not ft else contextlib.ExitStack(): + output = self.encoder(**kwargs) + decoder_out = self.decoder(prev_output_tokens=kwargs['prev_output_tokens'], encoder_out=output) + return decoder_out + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/base_lrs3_finetune30_av.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/base_lrs3_finetune30_av.sh new file mode 100644 index 0000000000000000000000000000000000000000..422939bb222204506bf2f35774982dfa797aceac --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/base_lrs3_finetune30_av.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +ngpu=$1 +updatefreq=$2 +max_tokens=$3 +pretrained_model_path=$4 +save_path=$5 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/finetune --config-name base_lrs3_30h_av.yaml \ + task.data=/path/to/30h_data_tsv \ + task.label_dir=/path/to/30h_data_tsv \ + task.tokenizer_bpe_model=/path/to/sentencepiece/model \ + task.modalities=["audio","video"] \ + model.w2v_path=${pretrained_model_path} \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + distributed_training.ddp_backend="no_c10d" \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=${max_tokens} \ + +task.use_supervised_data=False \ + +task.use_extra_textdata=False \ + +task.use_extra_audiodata=False \ + + + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/base_vox_finetune30_av.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/base_vox_finetune30_av.sh new file mode 100644 index 0000000000000000000000000000000000000000..5e8a9e55bdf7f6380dcf5611e8407a6a28b26604 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/base_vox_finetune30_av.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +ngpu=$1 +updatefreq=$2 +max_tokens=$3 +pretrained_model_path=$4 +save_path=$5 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/finetune --config-name base_vox_30h_av.yaml \ + task.data=/path/to/30h_data_tsv \ + task.label_dir=/path/to/30h_data_tsv \ + task.tokenizer_bpe_model=/path/to/sentencepiece/model \ + task.modalities=["audio","video"] \ + model.w2v_path=${pretrained_model_path} \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + distributed_training.ddp_backend="no_c10d" \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=${max_tokens} \ + +task.use_supervised_data=False \ + +task.use_extra_textdata=False \ + +task.use_extra_audiodata=False \ + + + + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/base_vox_finetune433_av.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/base_vox_finetune433_av.sh new file mode 100644 index 0000000000000000000000000000000000000000..0a9ad419c779f7a170605c04be979892ea5332cc --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/base_vox_finetune433_av.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +ngpu=$1 +updatefreq=$2 +max_tokens=$3 +pretrained_model_path=$4 +save_path=$5 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/finetune --config-name base_vox_433h_av.yaml \ + task.data=/path/to/433h_data_tsv \ + task.label_dir=/path/to/433h_data_tsv \ + task.tokenizer_bpe_model=/path/to/sentencepiece/model \ + task.modalities=["audio","video"] \ + model.w2v_path=${pretrained_model_path} \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + distributed_training.ddp_backend="no_c10d" \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=${max_tokens} \ + +task.use_supervised_data=False \ + +task.use_extra_textdata=False \ + +task.use_extra_audiodata=False \ + + + + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/large_vox_finetune30_av.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/large_vox_finetune30_av.sh new file mode 100644 index 0000000000000000000000000000000000000000..fb849c3de0e5ea7ff89d26f9379529e5c90aca11 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/large_vox_finetune30_av.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +ngpu=$1 +updatefreq=$2 +max_tokens=$3 +pretrained_model_path=$4 +save_path=$5 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/finetune --config-name large_vox_30h_av.yaml \ + task.data=/path/to/30h_data_tsv \ + task.label_dir=/path/to/30h_data_tsv \ + task.tokenizer_bpe_model=/path/to/sentencepiece/model \ + task.modalities=["audio","video"] \ + model.w2v_path=${pretrained_model_path} \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + distributed_training.ddp_backend="no_c10d" \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=${max_tokens} \ + +task.use_supervised_data=False \ + +task.use_extra_textdata=False \ + +task.use_extra_audiodata=False \ \ No newline at end of file diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/large_vox_finetune433_av.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/large_vox_finetune433_av.sh new file mode 100644 index 0000000000000000000000000000000000000000..47668d093dce01191a5ddcaae0dd469dd3537f37 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_avsr/large_vox_finetune433_av.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +ngpu=$1 +updatefreq=$2 +max_tokens=$3 +pretrained_model_path=$4 +save_path=$5 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/finetune --config-name large_vox_433h_av.yaml \ + task.data=/path/to/433h_data_tsv \ + task.label_dir=/path/to/433h_data_tsv \ + task.tokenizer_bpe_model=/path/to/sentencepiece/model \ + task.modalities=["audio","video"] \ + model.w2v_path=${pretrained_model_path} \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + distributed_training.ddp_backend="no_c10d" \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=${max_tokens} \ + +task.use_supervised_data=False \ + +task.use_extra_textdata=False \ + +task.use_extra_audiodata=False \ + + + + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/base_lrs3_finetune30_v.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/base_lrs3_finetune30_v.sh new file mode 100644 index 0000000000000000000000000000000000000000..56ac2375c27fdc3c6999116ea74565af773267c9 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/base_lrs3_finetune30_v.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +ngpu=$1 +updatefreq=$2 +max_tokens=$3 +pretrained_model_path=$4 +save_path=$5 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/finetune --config-name base_lrs3_30h_v.yaml \ + task.data=/path/to/30h_data_tsv \ + task.label_dir=/path/to/30h_data_tsv \ + task.tokenizer_bpe_model=/path/to/sentencepiece/model \ + task.modalities=["video"] \ + model.w2v_path=${pretrained_model_path} \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + distributed_training.ddp_backend="no_c10d" \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=${max_tokens} \ + +task.use_supervised_data=False \ + +task.use_extra_textdata=False \ + +task.use_extra_audiodata=False \ + + + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/base_vox_finetune30_v.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/base_vox_finetune30_v.sh new file mode 100644 index 0000000000000000000000000000000000000000..d3f1b13feba0701f629e66409a0cc082ecd0d7ac --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/base_vox_finetune30_v.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +ngpu=$1 +updatefreq=$2 +max_tokens=$3 +pretrained_model_path=$4 +save_path=$5 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/finetune --config-name base_vox_30h_v.yaml \ + task.data=/path/to/30h_data_tsv \ + task.label_dir=/path/to/30h_data_tsv \ + task.tokenizer_bpe_model=/path/to/sentencepiece/model \ + task.modalities=["video"] \ + model.w2v_path=${pretrained_model_path} \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + distributed_training.ddp_backend="no_c10d" \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=${max_tokens} \ + +task.use_supervised_data=False \ + +task.use_extra_textdata=False \ + +task.use_extra_audiodata=False \ + + + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/base_vox_finetune433_v.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/base_vox_finetune433_v.sh new file mode 100644 index 0000000000000000000000000000000000000000..4943d8a22268b4e97ba047c7cca18208fecef4fa --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/base_vox_finetune433_v.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +ngpu=$1 +updatefreq=$2 +max_tokens=$3 +pretrained_model_path=$4 +save_path=$5 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/finetune --config-name base_vox_433h_v.yaml \ + task.data=/path/to/433h_data_tsv \ + task.label_dir=/path/to/433h_data_tsv \ + task.tokenizer_bpe_model=/path/to/sentencepiece/model \ + task.modalities=["video"] \ + model.w2v_path=${pretrained_model_path} \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + distributed_training.ddp_backend="no_c10d" \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=${max_tokens} \ + +task.use_supervised_data=False \ + +task.use_extra_textdata=False \ + +task.use_extra_audiodata=False \ + + + + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/large_vox_finetune30_v.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/large_vox_finetune30_v.sh new file mode 100644 index 0000000000000000000000000000000000000000..c36d01767ea058ae3b086f486bd1043a643ae470 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/large_vox_finetune30_v.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +ngpu=$1 +updatefreq=$2 +max_tokens=$3 +pretrained_model_path=$4 +save_path=$5 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/finetune --config-name large_vox_30h_v.yaml \ + task.data=/path/to/30h_data_tsv \ + task.label_dir=/path/to/30h_data_tsv \ + task.tokenizer_bpe_model=/path/to/sentencepiece/model \ + task.modalities=["video"] \ + model.w2v_path=${pretrained_model_path} \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + distributed_training.ddp_backend="no_c10d" \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=${max_tokens} \ + +task.use_supervised_data=False \ + +task.use_extra_textdata=False \ + +task.use_extra_audiodata=False \ + + + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/large_vox_finetune433_v.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/large_vox_finetune433_v.sh new file mode 100644 index 0000000000000000000000000000000000000000..275222c30c0af19b3788b98e2c6e9a7b62e1f0c3 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/finetune_vsr/large_vox_finetune433_v.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +ngpu=$1 +updatefreq=$2 +max_tokens=$3 +pretrained_model_path=$4 +save_path=$5 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/finetune --config-name large_vox_433h_v.yaml \ + task.data=/path/to/433h_data_tsv \ + task.label_dir=/path/to/433h_data_tsv \ + task.tokenizer_bpe_model=/path/to/sentencepiece/model \ + task.modalities=["video"] \ + model.w2v_path=${pretrained_model_path} \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + distributed_training.ddp_backend="no_c10d" \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=${max_tokens} \ + +task.use_supervised_data=False \ + +task.use_extra_textdata=False \ + +task.use_extra_audiodata=False \ + + + + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/pretrain/base_lsr3_pretrain_iter5.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/pretrain/base_lsr3_pretrain_iter5.sh new file mode 100644 index 0000000000000000000000000000000000000000..bb9d03cd6272b5b2c52aff645c0cf6580379f473 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/pretrain/base_lsr3_pretrain_iter5.sh @@ -0,0 +1,31 @@ +#!/bin/bash +ngpu=$1 +updatefreq=$2 +datapath=/LocalData/vatlm_related/fbankdata +save_path=$3 + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/pretrain --config-name base_lrs3_iter5.yaml \ + task.data=${datapath}/433pre_lrs3_433h_tsv \ + task.label_dir=${datapath}/433pre_lrs3_433h_tsv \ + +task.sup_data_path=${datapath}/433pre_tedv3_phone_concat_tsv2 \ + +task.sup_manifest=${datapath}/433pre_tedv3_phone_concat_tsv2 \ + +task.onlytext_manifest=${datapath}/433pre_cantab_tsv \ + +task.onlyaudio_manifest=${datapath}/433pre_giga_tsv_km \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=3000 \ + model.label_rate=25 \ + common.log_interval=200 \ + checkpoint.save_interval=5 \ + +task.sample_distributions=\"0.08,0.1,0.15,0.15\" \ + +criterion.banlance_loss_weights=[1.0,1.0] \ + dataset.data_buffer_size=40 \ + +task.use_supervised_data=True \ + +task.use_extra_textdata=True \ + +task.use_extra_audiodata=True \ + + + \ No newline at end of file diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/pretrain/base_vox_pretrain_iter5.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/pretrain/base_vox_pretrain_iter5.sh new file mode 100644 index 0000000000000000000000000000000000000000..221588a16fcef4ffc084d59cdbee8c04171ac023 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/pretrain/base_vox_pretrain_iter5.sh @@ -0,0 +1,30 @@ +#!/bin/bash +ngpu=$1 +updatefreq=$2 +datapath=/LocalData/vatlm_related/fbankdata +save_path=$3 + + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/pretrain --config-name base_vox_iter5.yaml \ + task.data=${datapath}/fbank_lrs3_vox_tsv \ + task.label_dir=${datapath}/fbank_lrs3_vox_tsv \ + +task.sup_data_path=${datapath}/fbank_tedv3_phone_concat_vox_tsv \ + +task.sup_manifest=${datapath}/fbank_tedv3_phone_concat_vox_tsv \ + +task.onlytext_manifest=${datapath}/cantab2_vox_tsv \ + +task.onlyaudio_manifest=${datapath}/fbank_giga_vox_tsv_km \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=3000 \ + model.label_rate=25 \ + common.log_interval=200 \ + checkpoint.save_interval=5 \ + +task.sample_distributions=\"0.13,0.15,0.32,0.3\" \ + +criterion.banlance_loss_weights=[1.0,1.0] \ + dataset.data_buffer_size=40 \ + +task.use_supervised_data=True \ + +task.use_extra_textdata=True \ + +task.use_extra_audiodata=True \ + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/scripts/pretrain/large_vox_pretrain_iter5.sh b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/pretrain/large_vox_pretrain_iter5.sh new file mode 100644 index 0000000000000000000000000000000000000000..064f9ce14bc2c54809f060ad997664913d95bbfa --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/scripts/pretrain/large_vox_pretrain_iter5.sh @@ -0,0 +1,31 @@ +#!/bin/bash +unset WORLD_SIZE +ngpu=$1 +updatefreq=$2 +datapath=/LocalData/vatlm_related/fbankdata +save_path=$3 + + +python /path/to/fairseq/fairseq_cli/hydra_train.py \ + --config-dir /path/to/vat_hubert/vathubert/conf/pretrain --config-name large_vox_iter5.yaml \ + task.data=${datapath}/fbank_lrs3_vox_tsv \ + task.label_dir=${datapath}/fbank_lrs3_vox_tsv \ + +task.sup_data_path=${datapath}/fbank_tedv3_phone_concat_vox_tsv \ + +task.sup_manifest=${datapath}/fbank_tedv3_phone_concat_vox_tsv \ + +task.onlytext_manifest=${datapath}/cantab2_vox_tsv \ + +task.onlyaudio_manifest=${datapath}/fbank_giga_vox_tsv_km \ + hydra.run.dir=${save_path} \ + common.user_dir=/path/to/vat_hubert/vathubert \ + distributed_training.distributed_world_size=${ngpu} \ + optimization.update_freq=[${updatefreq}] \ + dataset.max_tokens=3000 \ + model.label_rate=25 \ + common.log_interval=200 \ + checkpoint.save_interval=5 \ + +task.sample_distributions=\"0.13,0.15,0.32,0.3\" \ + +criterion.banlance_loss_weights=[1.0,1.0] \ + dataset.data_buffer_size=40 \ + +task.use_supervised_data=True \ + +task.use_extra_textdata=True \ + +task.use_extra_audiodata=True \ + diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/sequence_generator.py b/SpeechT5/VATLM/vat_hubert/vathubert/sequence_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..49cfa7a5125a5e32d40693a6367dfb7aa4cad703 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/sequence_generator.py @@ -0,0 +1,988 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import math +from typing import Dict, List, Optional +import sys + +import torch +import torch.nn as nn +from fairseq import search, utils +from fairseq.data import data_utils +from fairseq.models import FairseqIncrementalDecoder +from torch import Tensor +from fairseq.ngram_repeat_block import NGramRepeatBlock + + +class SequenceGenerator(nn.Module): + def __init__( + self, + models, + tgt_dict, + beam_size=1, + max_len_a=0, + max_len_b=200, + max_len=0, + min_len=1, + normalize_scores=True, + len_penalty=1.0, + unk_penalty=0.0, + temperature=1.0, + match_source_len=False, + no_repeat_ngram_size=0, + search_strategy=None, + eos=None, + symbols_to_strip_from_output=None, + lm_model=None, + lm_weight=1.0, + ): + """Generates translations of a given source sentence. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models, + currently support fairseq.models.TransformerModel for scripting + beam_size (int, optional): beam width (default: 1) + max_len_a/b (int, optional): generate sequences of maximum length + ax + b, where x is the source length + max_len (int, optional): the maximum length of the generated output + (not including end-of-sentence) + min_len (int, optional): the minimum length of the generated output + (not including end-of-sentence) + normalize_scores (bool, optional): normalize scores by the length + of the output (default: True) + len_penalty (float, optional): length penalty, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + unk_penalty (float, optional): unknown word penalty, where <0 + produces more unks, >0 produces fewer (default: 0.0) + temperature (float, optional): temperature, where values + >1.0 produce more uniform samples and values <1.0 produce + sharper samples (default: 1.0) + match_source_len (bool, optional): outputs should match the source + length (default: False) + """ + super().__init__() + if isinstance(models, EnsembleModel): + self.model = models + else: + self.model = EnsembleModel(models) + self.tgt_dict = tgt_dict + self.pad = tgt_dict.pad() + self.unk = tgt_dict.unk() + self.eos = tgt_dict.eos() if eos is None else eos + self.symbols_to_strip_from_output = ( + symbols_to_strip_from_output.union({self.eos}) + if symbols_to_strip_from_output is not None + else {self.eos} + ) + self.vocab_size = len(tgt_dict) + self.beam_size = beam_size + # the max beam size is the dictionary size - 1, since we never select pad + self.beam_size = min(beam_size, self.vocab_size - 1) + self.max_len_a = max_len_a + self.max_len_b = max_len_b + self.min_len = min_len + self.max_len = max_len or self.model.max_decoder_positions() + + self.normalize_scores = normalize_scores + self.len_penalty = len_penalty + self.unk_penalty = unk_penalty + self.temperature = temperature + self.match_source_len = match_source_len + + if no_repeat_ngram_size > 0: + self.repeat_ngram_blocker = NGramRepeatBlock(no_repeat_ngram_size) + else: + self.repeat_ngram_blocker = None + + assert temperature > 0, "--temperature must be greater than 0" + + self.search = ( + search.BeamSearch(tgt_dict) if search_strategy is None else search_strategy + ) + # We only need to set src_lengths in LengthConstrainedBeamSearch. + # As a module attribute, setting it would break in multithread + # settings when the model is shared. + self.should_set_src_lengths = ( + hasattr(self.search, "needs_src_lengths") and self.search.needs_src_lengths + ) + + self.model.eval() + + self.lm_model = lm_model + self.lm_weight = lm_weight + if self.lm_model is not None: + self.lm_model.eval() + + def cuda(self): + self.model.cuda() + return self + + @torch.no_grad() + def forward( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + """Generate a batch of translations. + + Args: + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, prefix_tokens, bos_token=bos_token) + + # TODO(myleott): unused, deprecate after pytorch-translate migration + def generate_batched_itr(self, data_itr, beam_size=None, cuda=False, timer=None): + """Iterate over a batched dataset and yield individual translations. + Args: + cuda (bool, optional): use GPU for generation + timer (StopwatchMeter, optional): time generations + """ + for sample in data_itr: + s = utils.move_to_cuda(sample) if cuda else sample + if "net_input" not in s: + continue + input = s["net_input"] + # model.forward normally channels prev_output_tokens into the decoder + # separately, but SequenceGenerator directly calls model.encoder + encoder_input = { + k: v for k, v in input.items() if k != "prev_output_tokens" + } + if timer is not None: + timer.start() + with torch.no_grad(): + hypos = self.generate(encoder_input) + if timer is not None: + timer.stop(sum(len(h[0]["tokens"]) for h in hypos)) + for i, id in enumerate(s["id"].data): + # remove padding + src = utils.strip_pad(input["src_tokens"].data[i, :], self.pad) + ref = ( + utils.strip_pad(s["target"].data[i, :], self.pad) + if s["target"] is not None + else None + ) + yield id, src, ref, hypos[i] + + @torch.no_grad() + def generate(self, models, sample: Dict[str, Dict[str, Tensor]], **kwargs) -> List[List[Dict[str, Tensor]]]: + """Generate translations. Match the api of other fairseq generators. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + constraints (torch.LongTensor, optional): force decoder to include + the list of constraints + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, **kwargs) + + def _generate( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + constraints: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) + for i in range(self.model.models_size) + ], + ) + net_input = sample["net_input"] + + if "src_tokens" in net_input: + src_tokens = net_input["src_tokens"] + # length of the source text being the character length except EndOfSentence and pad + src_lengths = ( + (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1) + ) + elif "source" in net_input: + src_tokens = net_input["source"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + elif "features" in net_input: + src_tokens = net_input["features"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + else: + raise Exception("expected src_tokens or source in net input. input keys: " + str(net_input.keys())) + + # bsz: total number of sentences in beam + # Note that src_tokens may have more than 2 dimensions (i.e. audio features) + if src_tokens['audio'] is not None: + bsz, src_len = src_tokens['audio'].size()[:2] + src_device = src_tokens['audio'].device + else: + bsz, src_len = net_input['padding_mask'].size() + src_device = src_tokens['video'].device + beam_size = self.beam_size + if constraints is not None and not self.search.supports_constraints: + raise NotImplementedError( + "Target-side constraints were provided, but search method doesn't support them" + ) + + # Initialize constraints, when active + self.search.init_constraints(constraints, beam_size) + + max_len: int = -1 + if self.match_source_len: + max_len = src_lengths.max().item() + else: + max_len = min( + int(self.max_len_a * src_len + self.max_len_b), + self.max_len - 1, + ) + assert ( + self.min_len <= max_len + ), "min_len cannot be larger than max_len, please adjust these!" + # compute the encoder output for each beam + encoder_outs = self.model.forward_encoder(net_input) + + # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores + new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) + new_order = new_order.to(src_device).long() + encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order) + # ensure encoder_outs is a List. + assert encoder_outs is not None + + # initialize buffers + scores = ( + torch.zeros(bsz * beam_size, max_len + 1).to(src_device).float() + ) # +1 for eos; pad is never chosen for scoring + tokens = ( + torch.zeros(bsz * beam_size, max_len + 2) + .to(src_device) + .long() + .fill_(self.pad) + ) # +2 for eos and pad + tokens[:, 0] = self.eos if bos_token is None else bos_token + attn: Optional[Tensor] = None + + # A list that indicates candidates that should be ignored. + # For example, suppose we're sampling and have already finalized 2/5 + # samples. Then cands_to_ignore would mark 2 positions as being ignored, + # so that we only finalize the remaining 3 samples. + cands_to_ignore = ( + torch.zeros(bsz, beam_size).to(src_device).eq(-1) + ) # forward and backward-compatible False mask + + # list of completed sentences + finalized = torch.jit.annotate( + List[List[Dict[str, Tensor]]], + [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)], + ) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step + + # a boolean array indicating if the sentence at the index is finished or not + finished = [False for i in range(bsz)] + num_remaining_sent = bsz # number of sentences remaining + + # number of candidate hypos per step + cand_size = 2 * beam_size # 2 x beam size in case half are EOS + + # offset arrays for converting between different indexing schemes + bbsz_offsets = ( + (torch.arange(0, bsz) * beam_size) + .unsqueeze(1) + .type_as(tokens) + .to(src_device) + ) + cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_device) + + reorder_state: Optional[Tensor] = None + batch_idxs: Optional[Tensor] = None + + original_batch_idxs: Optional[Tensor] = None + if "id" in sample and isinstance(sample["id"], Tensor): + original_batch_idxs = sample["id"] + else: + original_batch_idxs = torch.arange(0, bsz).type_as(tokens) + + for step in range(max_len + 1): # one extra step for EOS marker + # reorder decoder internal states based on the prev choice of beams + if reorder_state is not None: + if batch_idxs is not None: + # update beam indices to take into account removed sentences + corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as( + batch_idxs + ) + reorder_state.view(-1, beam_size).add_( + corr.unsqueeze(-1) * beam_size + ) + original_batch_idxs = original_batch_idxs[batch_idxs] + self.model.reorder_incremental_state(incremental_states, reorder_state) + encoder_outs = self.model.reorder_encoder_out( + encoder_outs, reorder_state + ) + + lprobs, avg_attn_scores = self.model.forward_decoder( + tokens[:, : step + 1], + encoder_outs, + incremental_states, + self.temperature, + ) + + if self.lm_model is not None: + lm_out = self.lm_model(tokens[:, : step + 1]) + probs = self.lm_model.get_normalized_probs( + lm_out, log_probs=True, sample=None + ) + probs = probs[:, -1, :] * self.lm_weight + lprobs += probs + + lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs) + + lprobs[:, self.pad] = -math.inf # never select pad + lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty + + # handle max length constraint + if step >= max_len: + lprobs[:, : self.eos] = -math.inf + lprobs[:, self.eos + 1 :] = -math.inf + + # handle prefix tokens (possibly with different lengths) + if ( + prefix_tokens is not None + and step < prefix_tokens.size(1) + and step < max_len + ): + lprobs, tokens, scores = self._prefix_tokens( + step, lprobs, scores, tokens, prefix_tokens, beam_size + ) + elif step < self.min_len: + # minimum length constraint (does not apply if using prefix_tokens) + lprobs[:, self.eos] = -math.inf + + # Record attention scores, only support avg_attn_scores is a Tensor + if avg_attn_scores is not None: + if attn is None: + attn = torch.empty( + bsz * beam_size, avg_attn_scores.size(1), max_len + 2 + ).to(scores) + attn[:, :, step + 1].copy_(avg_attn_scores) + + scores = scores.type_as(lprobs) + eos_bbsz_idx = torch.empty(0).to( + tokens + ) # indices of hypothesis ending with eos (finished sentences) + eos_scores = torch.empty(0).to( + scores + ) # scores of hypothesis ending with eos (finished sentences) + + if self.should_set_src_lengths: + self.search.set_src_lengths(src_lengths) + + if self.repeat_ngram_blocker is not None: + lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz, beam_size, step) + + # Shape: (batch, cand_size) + cand_scores, cand_indices, cand_beams = self.search.step( + step, + lprobs.view(bsz, -1, self.vocab_size), + scores.view(bsz, beam_size, -1)[:, :, :step], + tokens[:, : step + 1], + original_batch_idxs, + ) + + # cand_bbsz_idx contains beam indices for the top candidate + # hypotheses, with a range of values: [0, bsz*beam_size), + # and dimensions: [bsz, cand_size] + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + + # finalize hypotheses that end in eos + # Shape of eos_mask: (batch size, beam size) + eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf) + eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask) + + # only consider eos when it's among the top beam_size indices + # Now we know what beam item(s) to finish + # Shape: 1d list of absolute-numbered + eos_bbsz_idx = torch.masked_select( + cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents: List[int] = [] + if eos_bbsz_idx.numel() > 0: + eos_scores = torch.masked_select( + cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents = self.finalize_hypos( + step, + eos_bbsz_idx, + eos_scores, + tokens, + scores, + finalized, + finished, + beam_size, + attn, + src_lengths, + max_len, + ) + num_remaining_sent -= len(finalized_sents) + + assert num_remaining_sent >= 0 + if num_remaining_sent == 0: + break + if self.search.stop_on_max_len and step >= max_len: + break + assert step < max_len, f"{step} < {max_len}" + + # Remove finalized sentences (ones for which {beam_size} + # finished hypotheses have been generated) from the batch. + if len(finalized_sents) > 0: + new_bsz = bsz - len(finalized_sents) + + # construct batch_idxs which holds indices of batches to keep for the next pass + batch_mask = torch.ones( + bsz, dtype=torch.bool, device=cand_indices.device + ) + batch_mask[finalized_sents] = False + # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it + batch_idxs = torch.arange( + bsz, device=cand_indices.device + ).masked_select(batch_mask) + + # Choose the subset of the hypothesized constraints that will continue + self.search.prune_sentences(batch_idxs) + + eos_mask = eos_mask[batch_idxs] + cand_beams = cand_beams[batch_idxs] + bbsz_offsets.resize_(new_bsz, 1) + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + cand_scores = cand_scores[batch_idxs] + cand_indices = cand_indices[batch_idxs] + + if prefix_tokens is not None: + prefix_tokens = prefix_tokens[batch_idxs] + src_lengths = src_lengths[batch_idxs] + cands_to_ignore = cands_to_ignore[batch_idxs] + + scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + if attn is not None: + attn = attn.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, attn.size(1), -1 + ) + bsz = new_bsz + else: + batch_idxs = None + + # Set active_mask so that values > cand_size indicate eos hypos + # and values < cand_size indicate candidate active hypos. + # After, the min values per row are the top candidate active hypos + + # Rewrite the operator since the element wise or is not supported in torchscript. + + eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size])) + active_mask = torch.add( + eos_mask.type_as(cand_offsets) * cand_size, + cand_offsets[: eos_mask.size(1)], + ) + + # get the top beam_size active hypotheses, which are just + # the hypos with the smallest values in active_mask. + # {active_hypos} indicates which {beam_size} hypotheses + # from the list of {2 * beam_size} candidates were + # selected. Shapes: (batch size, beam size) + new_cands_to_ignore, active_hypos = torch.topk( + active_mask, k=beam_size, dim=1, largest=False + ) + + # update cands_to_ignore to ignore any finalized hypos. + cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size] + # Make sure there is at least one active item for each sentence in the batch. + assert (~cands_to_ignore).any(dim=1).all() + + # update cands_to_ignore to ignore any finalized hypos + + # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam + # can be selected more than once). + active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos) + active_scores = torch.gather(cand_scores, dim=1, index=active_hypos) + + active_bbsz_idx = active_bbsz_idx.view(-1) + active_scores = active_scores.view(-1) + + # copy tokens and scores for active hypotheses + + # Set the tokens for each beam (can select the same row more than once) + tokens[:, : step + 1] = torch.index_select( + tokens[:, : step + 1], dim=0, index=active_bbsz_idx + ) + # Select the next token for each of them + tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather( + cand_indices, dim=1, index=active_hypos + ) + if step > 0: + scores[:, :step] = torch.index_select( + scores[:, :step], dim=0, index=active_bbsz_idx + ) + scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather( + cand_scores, dim=1, index=active_hypos + ) + + # Update constraints based on which candidates were selected for the next beam + self.search.update_constraints(active_hypos) + + # copy attention for active hypotheses + if attn is not None: + attn[:, :, : step + 2] = torch.index_select( + attn[:, :, : step + 2], dim=0, index=active_bbsz_idx + ) + + # reorder incremental state in decoder + reorder_state = active_bbsz_idx + + # sort by score descending + for sent in range(len(finalized)): + scores = torch.tensor( + [float(elem["score"].item()) for elem in finalized[sent]] + ) + _, sorted_scores_indices = torch.sort(scores, descending=True) + finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices] + finalized[sent] = torch.jit.annotate( + List[Dict[str, Tensor]], finalized[sent] + ) + return finalized + + def _prefix_tokens( + self, step: int, lprobs, scores, tokens, prefix_tokens, beam_size: int + ): + """Handle prefix tokens""" + prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1) + prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1)) + prefix_mask = prefix_toks.ne(self.pad) + lprobs[prefix_mask] = torch.tensor(-math.inf).to(lprobs) + lprobs[prefix_mask] = lprobs[prefix_mask].scatter( + -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs[prefix_mask] + ) + # if prefix includes eos, then we should make sure tokens and + # scores are the same across all beams + eos_mask = prefix_toks.eq(self.eos) + if eos_mask.any(): + # validate that the first beam matches the prefix + first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[ + :, 0, 1 : step + 1 + ] + eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0] + target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step] + assert (first_beam == target_prefix).all() + + # copy tokens, scores and lprobs from the first beam to all beams + tokens = self.replicate_first_beam(tokens, eos_mask_batch_dim, beam_size) + scores = self.replicate_first_beam(scores, eos_mask_batch_dim, beam_size) + lprobs = self.replicate_first_beam(lprobs, eos_mask_batch_dim, beam_size) + return lprobs, tokens, scores + + def replicate_first_beam(self, tensor, mask, beam_size: int): + tensor = tensor.view(-1, beam_size, tensor.size(-1)) + tensor[mask] = tensor[mask][:, :1, :] + return tensor.view(-1, tensor.size(-1)) + + def finalize_hypos( + self, + step: int, + bbsz_idx, + eos_scores, + tokens, + scores, + finalized: List[List[Dict[str, Tensor]]], + finished: List[bool], + beam_size: int, + attn: Optional[Tensor], + src_lengths, + max_len: int, + ): + """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly. + A sentence is finalized when {beam_size} finished items have been collected for it. + + Returns number of sentences (not beam items) being finalized. + These will be removed from the batch and not processed further. + Args: + bbsz_idx (Tensor): + """ + assert bbsz_idx.numel() == eos_scores.numel() + + # clone relevant token and attention tensors. + # tokens is (batch * beam, max_len). So the index_select + # gets the newly EOS rows, then selects cols 1..{step + 2} + tokens_clone = tokens.index_select(0, bbsz_idx)[ + :, 1 : step + 2 + ] # skip the first index, which is EOS + + tokens_clone[:, step] = self.eos + attn_clone = ( + attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2] + if attn is not None + else None + ) + + # compute scores per token position + pos_scores = scores.index_select(0, bbsz_idx)[:, : step + 1] + pos_scores[:, step] = eos_scores + # convert from cumulative to per-position scores + pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] + + # normalize sentence-level scores + if self.normalize_scores: + eos_scores /= (step + 1) ** self.len_penalty + + # cum_unfin records which sentences in the batch are finished. + # It helps match indexing between (a) the original sentences + # in the batch and (b) the current, possibly-reduced set of + # sentences. + cum_unfin: List[int] = [] + prev = 0 + for f in finished: + if f: + prev += 1 + else: + cum_unfin.append(prev) + + # The keys here are of the form "{sent}_{unfin_idx}", where + # "unfin_idx" is the index in the current (possibly reduced) + # list of sentences, and "sent" is the index in the original, + # unreduced batch + # set() is not supported in script export + sents_seen: Dict[str, Optional[Tensor]] = {} + + # For every finished beam item + for i in range(bbsz_idx.size()[0]): + idx = bbsz_idx[i] + score = eos_scores[i] + # sentence index in the current (possibly reduced) batch + unfin_idx = idx // beam_size + # sentence index in the original (unreduced) batch + sent = unfin_idx + cum_unfin[unfin_idx] + # Cannot create dict for key type '(int, int)' in torchscript. + # The workaround is to cast int to string + seen = str(sent.item()) + "_" + str(unfin_idx.item()) + if seen not in sents_seen: + sents_seen[seen] = None + + if self.match_source_len and step > src_lengths[unfin_idx]: + score = torch.tensor(-math.inf).to(score) + + # An input sentence (among those in a batch) is finished when + # beam_size hypotheses have been collected for it + if len(finalized[sent]) < beam_size: + if attn_clone is not None: + # remove padding tokens from attn scores + hypo_attn = attn_clone[i] + else: + hypo_attn = torch.empty(0) + + finalized[sent].append( + { + "tokens": tokens_clone[i], + "score": score, + "attention": hypo_attn, # src_len x tgt_len + "alignment": torch.empty(0), + "positional_scores": pos_scores[i], + } + ) + + newly_finished: List[int] = [] + + for seen in sents_seen.keys(): + # check termination conditions for this sentence + sent: int = int(float(seen.split("_")[0])) + unfin_idx: int = int(float(seen.split("_")[1])) + + if not finished[sent] and self.is_finished( + step, unfin_idx, max_len, len(finalized[sent]), beam_size + ): + finished[sent] = True + newly_finished.append(unfin_idx) + + return newly_finished + + def is_finished( + self, + step: int, + unfin_idx: int, + max_len: int, + finalized_sent_len: int, + beam_size: int, + ): + """ + Check whether decoding for a sentence is finished, which + occurs when the list of finalized sentences has reached the + beam size, or when we reach the maximum length. + """ + assert finalized_sent_len <= beam_size + if finalized_sent_len == beam_size or step == max_len: + return True + return False + + +class EnsembleModel(nn.Module): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__() + self.models_size = len(models) + # method '__len__' is not supported in ModuleList for torch script + self.single_model = models[0] + self.models = nn.ModuleList(models) + + self.has_incremental: bool = False + if all( + hasattr(m, "decoder") and isinstance(m.decoder, FairseqIncrementalDecoder) + for m in models + ): + self.has_incremental = True + + def forward(self): + pass + + def has_encoder(self): + return hasattr(self.single_model, "encoder") + + def has_incremental_states(self): + return self.has_incremental + + def max_decoder_positions(self): + return min([m.max_decoder_positions() for m in self.models if hasattr(m, "max_decoder_positions")] + [sys.maxsize]) + + @torch.jit.export + def forward_encoder(self, net_input: Dict[str, Tensor]): + if not self.has_encoder(): + return None + return [model.encoder.forward_torchscript(net_input) for model in self.models] + + @torch.jit.export + def forward_decoder( + self, + tokens, + encoder_outs: List[Dict[str, List[Tensor]]], + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + temperature: float = 1.0, + ): + log_probs = [] + avg_attn: Optional[Tensor] = None + encoder_out: Optional[Dict[str, List[Tensor]]] = None + for i, model in enumerate(self.models): + if self.has_encoder(): + encoder_out = encoder_outs[i] + # decode each model + if self.has_incremental_states(): + decoder_out = model.decoder.forward( + tokens, + encoder_out=encoder_out, + incremental_state=incremental_states[i], + ) + else: + if hasattr(model, "decoder"): + decoder_out = model.decoder.forward(tokens, encoder_out=encoder_out) + else: + decoder_out = model.forward(tokens) + + attn: Optional[Tensor] = None + decoder_len = len(decoder_out) + if decoder_len > 1 and decoder_out[1] is not None: + if isinstance(decoder_out[1], Tensor): + attn = decoder_out[1] + else: + attn_holder = decoder_out[1]["attn"] + if isinstance(attn_holder, Tensor): + attn = attn_holder + elif attn_holder is not None: + attn = attn_holder[0] + if attn is not None: + attn = attn[:, -1, :] + + decoder_out_tuple = ( + decoder_out[0][:, -1:, :].div_(temperature), + None if decoder_len <= 1 else decoder_out[1], + ) + probs = model.get_normalized_probs( + decoder_out_tuple, log_probs=True, sample=None + ) + probs = probs[:, -1, :] + if self.models_size == 1: + return probs, attn + + log_probs.append(probs) + if attn is not None: + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + + avg_probs = torch.logsumexp(torch.stack(log_probs, dim=0), dim=0) - math.log( + self.models_size + ) + + if avg_attn is not None: + avg_attn.div_(self.models_size) + return avg_probs, avg_attn + + @torch.jit.export + def reorder_encoder_out( + self, encoder_outs: Optional[List[Dict[str, List[Tensor]]]], new_order + ): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + new_outs: List[Dict[str, List[Tensor]]] = [] + if not self.has_encoder(): + return new_outs + for i, model in enumerate(self.models): + assert encoder_outs is not None + new_outs.append( + model.encoder.reorder_encoder_out(encoder_outs[i], new_order) + ) + return new_outs + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + new_order, + ): + if not self.has_incremental_states(): + return + for i, model in enumerate(self.models): + model.decoder.reorder_incremental_state_scripting( + incremental_states[i], new_order + ) + + +class SequenceGeneratorWithAlignment(SequenceGenerator): + def __init__( + self, models, tgt_dict, left_pad_target=False, print_alignment="hard", **kwargs + ): + """Generates translations of a given source sentence. + + Produces alignments following "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + left_pad_target (bool, optional): Whether or not the + hypothesis should be left padded or not when they are + teacher forced for generating alignments. + """ + super().__init__(EnsembleModelWithAlignment(models), tgt_dict, **kwargs) + self.left_pad_target = left_pad_target + + if print_alignment == "hard": + self.extract_alignment = utils.extract_hard_alignment + elif print_alignment == "soft": + self.extract_alignment = utils.extract_soft_alignment + + @torch.no_grad() + def generate(self, models, sample, **kwargs): + finalized = super()._generate(sample, **kwargs) + + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + beam_size = self.beam_size + ( + src_tokens, + src_lengths, + prev_output_tokens, + tgt_tokens, + ) = self._prepare_batch_for_alignment(sample, finalized) + if any(getattr(m, "full_context_alignment", False) for m in self.model.models): + attn = self.model.forward_align(src_tokens, src_lengths, prev_output_tokens) + else: + attn = [ + finalized[i // beam_size][i % beam_size]["attention"].transpose(1, 0) + for i in range(bsz * beam_size) + ] + + if src_tokens.device != "cpu": + src_tokens = src_tokens.to("cpu") + tgt_tokens = tgt_tokens.to("cpu") + attn = [i.to("cpu") for i in attn] + + # Process the attn matrix to extract hard alignments. + for i in range(bsz * beam_size): + alignment = self.extract_alignment( + attn[i], src_tokens[i], tgt_tokens[i], self.pad, self.eos + ) + finalized[i // beam_size][i % beam_size]["alignment"] = alignment + return finalized + + def _prepare_batch_for_alignment(self, sample, hypothesis): + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + src_tokens = ( + src_tokens[:, None, :] + .expand(-1, self.beam_size, -1) + .contiguous() + .view(bsz * self.beam_size, -1) + ) + src_lengths = sample["net_input"]["src_lengths"] + src_lengths = ( + src_lengths[:, None] + .expand(-1, self.beam_size) + .contiguous() + .view(bsz * self.beam_size) + ) + prev_output_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=True, + ) + tgt_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=False, + ) + return src_tokens, src_lengths, prev_output_tokens, tgt_tokens + + +class EnsembleModelWithAlignment(EnsembleModel): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__(models) + + def forward_align(self, src_tokens, src_lengths, prev_output_tokens): + avg_attn = None + for model in self.models: + decoder_out = model(src_tokens, src_lengths, prev_output_tokens) + attn = decoder_out[1]["attn"][0] + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + if len(self.models) > 1: + avg_attn.div_(len(self.models)) + return avg_attn diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/tasks/vathubert_pretraining.py b/SpeechT5/VATLM/vat_hubert/vathubert/tasks/vathubert_pretraining.py new file mode 100644 index 0000000000000000000000000000000000000000..08b81d1cb51c5f68328eb334c2ff67e7ea7e8c79 --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/tasks/vathubert_pretraining.py @@ -0,0 +1,863 @@ +# ---------------------------------------------------------------------------- +# VatLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning +# Github source: https://github.com/microsoft/SpeechT5/tree/main/VATLM +# Code based on fairseq: https://github.com/facebookresearch/fairseq and av_hubert: https://github.com/facebookresearch/av_hubert +# +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# ---------------------------------------------------------------------------- + +import logging +import os, glob +import sys +from typing import Dict, List, Optional, Tuple + +import numpy as np + +from dataclasses import dataclass, field +from fairseq import metrics, search +from fairseq.data import Dictionary, encoders +from fairseq.dataclass.configs import FairseqDataclass +from fairseq.tasks import register_task +from fairseq.tasks.fairseq_task import FairseqTask +from omegaconf import MISSING, II +import numpy as np +from argparse import Namespace + +DBG=True if len(sys.argv) == 1 else False + +if DBG: + from vathubert.data.vathubert_dataset import VATHubertDataset + from vathubert.sequence_generator import SequenceGenerator +else: + + from vathubert.data.vathubert_dataset import VATHubertDataset + from vathubert.sequence_generator import SequenceGenerator + from vathubert.data.audiohubert_dataset import AudioHubertDataset + from vathubert.data.texthubert_dataset import TextHubertDataset + from vathubert.data.onlyaudiohubert_dataset import OnlyAudioHubertDataset + +from fairseq.data.audio.multi_corpus_dataset_audio import MultiCorpusDataset +from collections import OrderedDict +from fairseq.data import FairseqDataset +from fairseq.data import data_utils +from fairseq.data import iterators + + +logger = logging.getLogger(__name__) + + +class LabelEncoder(object): + def __init__(self, dictionary: Dictionary) -> None: + self.dictionary = dictionary + + def __call__(self, label: str) -> List[str]: + return self.dictionary.encode_line( + label, append_eos=False, add_if_not_exist=False, + ) + +class LabelEncoderS2SToken(object): + def __init__(self, dictionary: Dictionary, bpe_tokenizer) -> None: + self.bpe_tokenizer = bpe_tokenizer + self.dictionary = dictionary + + def __call__(self, label: str) -> List[str]: + label = self.bpe_tokenizer.encode(label.lower()) + return self.dictionary.encode_line( + label, append_eos=True, add_if_not_exist=False, + ).long() + + def decode(self, tok, symbols_ignore=None): + tok = self.dictionary.string(tok, extra_symbols_to_ignore=symbols_ignore) + if self.bpe_tokenizer: + tok = self.bpe_tokenizer.decode(tok) + return tok + +@dataclass +class VATHubertPretrainingConfig(FairseqDataclass): + data: str = field( + default=MISSING, metadata={"help": "path to data directory"} + ) + labels: List[str] = field( + default_factory=lambda: ["ltr"], + metadata={ + "help": ( + "extension of the label files to load, frame-level labels for" + " pre-training, and sequence-level label for fine-tuning" + ) + }, + ) + label_dir: Optional[str] = field( + default=None, + metadata={ + "help": "if set, looks for labels in this directory instead", + }, + ) + label_rate: int = field( + default=-1, + metadata={"help": "label frame rate. -1 for sequence label"}, + ) + + sample_rate: int = field( + default=16_000, + metadata={ + "help": "target sample rate. audio files will be up/down " + "sampled to this rate" + }, + ) + normalize: bool = field( + default=False, + metadata={ + "help": "if set, normalizes input to have 0 mean and unit variance" + }, + ) + enable_padding: bool = field( + default=False, + metadata={"help": "pad shorter samples instead of cropping"}, + ) + max_sample_size: Optional[int] = field( + default=None, + metadata={"help": "max sample size to keep in training"}, + ) + min_sample_size: Optional[int] = field( + default=None, + metadata={"help": "min sample size to keep in training"}, + ) + max_trim_sample_size: Optional[int] = field( + default=II("task.max_sample_size"), + metadata={"help": "max sample size to trim to for batching"}, + ) + single_target: Optional[bool] = field( + default=False, + metadata={ + "help": "if set, AddTargetDatasets outputs same keys " + "as AddTargetDataset" + }, + ) + random_crop: Optional[bool] = field( + default=True, + metadata={"help": "always crop from the beginning if false"}, + ) + pad_audio: Optional[bool] = field( + default=False, + metadata={"help": "pad audio to the longest one in the batch if true"}, + ) + pdb: Optional[bool] = field( + default=False, + metadata={"help": "pdb"}, + ) + stack_order_audio: int = field( + default=1, + metadata={"help": "concatenate n consecutive audio frames for one step"}, + ) + skip_verify: Optional[bool] = field( + default=False, + metadata={"help": "skip verifying label-audio alignment"}, + ) + + text_sampling_alpha: float = field( + default=0.2, + metadata={ + "help": "Hyper-parameter alpha = 1/T for temperature-based text resampling." + "(alpha = 1 for no resampling)" + }, + ) + split_modality_batch: bool = field( + default=False, + metadata={"help": "whether create all samples of different modalities in a batch"}, + ) + image_aug: bool = field(default=False, metadata={'help': 'image data augmentation'}) + image_crop_size: int = field( + default=88, metadata={"help": "image ROI size"}) + image_mean: float = field( + default=0.421, metadata={"help": "image mean"}) + image_std: float = field( + default=0.165, metadata={"help": "image std"}) + modalities: Optional[List[str]] = field(default_factory=lambda: ["audio", "video"], metadata={'help': 'modalities to load'}) + is_s2s: bool=field(default=False, metadata={'help': 'seq2seq fine-tuning only'}) + tokenizer_bpe_name: Optional[str] = field(default=None, metadata={'help': 'tokenizer model name'}) + tokenizer_bpe_model: Optional[str] = field(default=None, metadata={'help': 'tokenizer model path'}) + noise_wav: Optional[str] = field(default=None, metadata={'help': 'manifest of noise wav files (one wav file path per line)'}) + noise_prob: float = field(default=0, metadata={'help': 'noise probability'}) + noise_snr: Optional[str] = field(default='0', metadata={'help': 'noise SNR in audio'}) + noise_num: int = field(default=1, metadata={'help': 'number of noise wav files to mix'}) + fine_tuning: bool = field(default=False, metadata={"help": "set to true if fine-tuning AV-Hubert"}) + use_supervised_data: bool = field(default=True, metadata={"help": "use paired speech-text data"}) + sup_data_path: Optional[str] = field( + default=None, + metadata={ + "help": "supervised dataset path", + }, + ) + sup_manifest: Optional[str] = field( + default=None, + metadata={ + "help": "supervised dataset manifest", + }, + ) + sample_distributions: Optional[str] = field(default='0', metadata={'help': 'sample distribution'}) + ########### + use_extra_textdata: bool = field(default=True, metadata={"help": "use extra text data"}) + onlytext_manifest: Optional[str] = field( + default=None, + metadata={ + "help": "text-only dataset manifest", + }, + ) + use_extra_audiodata: bool = field(default=True, metadata={"help": "use extra audio data"}) + onlyaudio_manifest: Optional[str] = field( + default=None, + metadata={ + "help": "audio-only dataset manifest", + }, + ) + +@register_task("vat_hubert_pretraining", dataclass=VATHubertPretrainingConfig) +class VATHubertPretrainingTask(FairseqTask): + + cfg: VATHubertPretrainingConfig + + def __init__( + self, + cfg: VATHubertPretrainingConfig, + ) -> None: + super().__init__(cfg) + + logger.info(f"current directory is {os.getcwd()}") + logger.info(f"VATHubertPretrainingTask Config {cfg}") + + self.state.add_factory("phone_dictionary", self.load_phone_dictionaries) + # self.state.add_factory("s2s_tokenizer", self.load_tokenizer) + + self.fine_tuning = cfg.fine_tuning + if cfg.fine_tuning: + self.state.add_factory("target_dictionary", self.load_dictionaries) + if cfg.is_s2s: + self.state.add_factory("s2s_tokenizer", self.load_tokenizer) + else: + self.state.add_factory("dictionaries", self.load_dictionaries) + + + + self.blank_symbol = "" + + @property + def source_dictionary(self) -> Optional[Dictionary]: + return None # self._source_dictionary + + @property + def target_dictionary(self) -> Optional[Dictionary]: + return self.state.target_dictionary # self._target_dictionary + + @property + def dictionaries(self) -> List[Dictionary]: + return self.state.dictionaries + + @property + def phone_dictionary(self) -> List[Dictionary]: + return self.state.phone_dictionary + + + def load_dictionaries(self): + label_dir = self.cfg.data if self.cfg.label_dir is None else self.cfg.label_dir + dictionaries = [ + Dictionary.load(f"{label_dir}/dict.{label}.txt") + for label in self.cfg.labels + ] + return dictionaries[0] if self.cfg.fine_tuning else dictionaries + + def load_tokenizer(self): + logger.info(f"Using tokenizer") + bpe_args = Namespace(**{'bpe': self.cfg.tokenizer_bpe_name, f"{self.cfg.tokenizer_bpe_name}_model": self.cfg.tokenizer_bpe_model}) + bpe_tokenizer = encoders.build_bpe(bpe_args) + return bpe_tokenizer + + def load_phone_dictionaries(self): + dictionaries = [ + Dictionary.load(f"{self.cfg.sup_manifest}/dict.phn.txt") + ] + return dictionaries + + + @property + def s2s_tokenizer(self): + return self.state.s2s_tokenizer + + @classmethod + def setup_task( + cls, cfg: VATHubertPretrainingConfig, **kwargs + ) -> "VATHubertPretrainingTask": + if cfg.pdb: + import pdb + pdb.set_trace() + return cls(cfg) + + def get_label_dir(self) -> str: + if self.cfg.label_dir is None: + return self.cfg.data + return self.cfg.label_dir + + def load_dataset(self, split: str, epoch=1, **kwargs) -> None: + manifest = f"{self.cfg.data}/{split}.tsv" + dictionaries = [self.target_dictionary] if self.fine_tuning else self.dictionaries + pad_list = [dictionary.pad() for dictionary in dictionaries] # [1], blank应该是[0] + eos_list = [dictionary.eos() for dictionary in dictionaries] # [2] + if not self.cfg.is_s2s: + procs = [LabelEncoder(dictionary) for dictionary in dictionaries] + else: + logger.info(f"Using tokenizer") + bpe_tokenizer = self.s2s_tokenizer + procs = [LabelEncoderS2SToken(dictionary, bpe_tokenizer) for dictionary in dictionaries] + paths = [ + f"{self.get_label_dir()}/{split}.{l}" for l in self.cfg.labels + ] + image_aug = self.cfg.image_aug if split == 'train' else False + noise_fn, noise_snr = f"{self.cfg.noise_wav}/{split}.tsv" if self.cfg.noise_wav is not None else None, eval(self.cfg.noise_snr) + noise_num = self.cfg.noise_num # + + all_datasets = [] + avdatasets = VATHubertDataset( + manifest, + sample_rate=self.cfg.sample_rate, + label_paths=paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_sample_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_trim_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=False, + random_crop=self.cfg.random_crop, + single_target=self.cfg.single_target, + stack_order_audio=self.cfg.stack_order_audio, + skip_verify=self.cfg.skip_verify, + image_mean=self.cfg.image_mean, + image_std=self.cfg.image_std, + image_crop_size=self.cfg.image_crop_size, + image_aug=image_aug, + modalities=self.cfg.modalities, + is_s2s=self.cfg.is_s2s, + noise_fn=noise_fn, + noise_prob=self.cfg.noise_prob, + noise_snr=noise_snr, + noise_num=noise_num + ) + all_datasets.append(avdatasets) + + # import pdb + # pdb.set_trace() + + if self.cfg.use_supervised_data: + sup_manifest = f"{self.cfg.sup_manifest}/{split}.tsv" + + sup_paths = [ + f"{self.cfg.sup_data_path}/{split}.{l}" for l in self.cfg.labels + ] + + phone_dictionaries = self.phone_dictionary + phone_procs = [LabelEncoder(dictionary) for dictionary in phone_dictionaries] + + atdatasets = AudioHubertDataset( + sup_manifest, + sample_rate=self.cfg.sample_rate, + label_paths=sup_paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + phone_sequence_processors=phone_procs, + max_keep_sample_size=self.cfg.max_sample_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_trim_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=True, + single_target=self.cfg.single_target, + stack_order_audio=self.cfg.stack_order_audio, + skip_verify=self.cfg.skip_verify, + is_s2s=self.cfg.is_s2s, + ) + all_datasets.append(atdatasets) + + if self.cfg.use_extra_textdata: + extra_text_manifest = f"{self.cfg.onlytext_manifest}/{split}.tsv" + extra_text_paths = [ + f"{self.cfg.onlytext_manifest}/{split}.{l}" for l in self.cfg.labels + ] + + # import pdb + # pdb.set_trace() + + textdatasets = TextHubertDataset( + extra_text_manifest, + sample_rate=self.cfg.sample_rate, + label_paths=extra_text_paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + phone_sequence_processors=phone_procs, + max_keep_sample_size=self.cfg.max_sample_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_trim_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=True, + single_target=self.cfg.single_target, + stack_order_audio=self.cfg.stack_order_audio, + skip_verify=self.cfg.skip_verify, + is_s2s=self.cfg.is_s2s, + ) + all_datasets.append(textdatasets) + + if self.cfg.use_extra_audiodata: + extra_audio_manifest = f"{self.cfg.onlyaudio_manifest}/{split}.tsv" + extra_audio_paths = [ + f"{self.cfg.onlyaudio_manifest}/{split}.{l}" for l in self.cfg.labels + ] + + audiodatasets = OnlyAudioHubertDataset( + extra_audio_manifest, + sample_rate=self.cfg.sample_rate, + label_paths=extra_audio_paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_sample_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_trim_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=False, + single_target=self.cfg.single_target, + stack_order_audio=self.cfg.stack_order_audio, + skip_verify=self.cfg.skip_verify, + is_s2s=self.cfg.is_s2s, + ) + all_datasets.append(audiodatasets) + + + + + dataset_list = all_datasets + dataset_dict = OrderedDict((name, d) for name, d in zip(["videoaudio", "audiotext", "onlytext", "onlyaudio"], dataset_list) if d is not None) + if not self.fine_tuning: + max_positions_dict = { + "videoaudio": 1024, + "audiotext": 1024, + "onlytext": 1024, + "onlyaudio": 1024, + } + max_positions_dict = OrderedDict((name, max_positions_dict[name]) for name in dataset_dict.keys()) + + max_tokens_ratios_dict = { + "videoaudio": 1.0, + "audiotext": 1.0, + "onlytext": 1.0, + "onlyaudio": 1.0, + } + max_tokens_ratios = [max_tokens_ratios_dict[name] for name in dataset_dict.keys()] + dataset_lens = np.array([len(dataset) for dataset in dataset_dict.values()]) + dataset_avg_sample_lens = np.array([ + sum([dataset.num_tokens(i) for i in np.random.randint(low=0, high=len(dataset), size=10000)]) / 10000.0 + for dataset in dataset_dict.values() + ]) + distributions = [eval(self.cfg.sample_distributions)[0], eval(self.cfg.sample_distributions)[1], eval(self.cfg.sample_distributions)[2], eval(self.cfg.sample_distributions)[3]] + + + + logging.info(f"Number samples of datasets is {dataset_lens}") + logging.info(f"Avg sample length of datasets is {dataset_avg_sample_lens}") + logging.info(f"Sampling distributions is {distributions}") + logging.info(f"Maxtokens ratio is {max_tokens_ratios}") + logging.info(f"split_modality_batch is {self.cfg.split_modality_batch}") + + + self.datasets[split] = MultiCorpusDataset( + dataset_dict, + max_positions=max_positions_dict, + distribution=distributions, + max_tokens_ratio=max_tokens_ratios, + seed=1234, + sort_indices=True, + ) + + if self.fine_tuning: + self.datasets[split] = VATHubertDataset( + manifest, + sample_rate=self.cfg.sample_rate, + label_paths=paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_sample_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_trim_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=False, + random_crop=self.cfg.random_crop, + single_target=self.cfg.single_target, + stack_order_audio=self.cfg.stack_order_audio, + skip_verify=self.cfg.skip_verify, + image_mean=self.cfg.image_mean, + image_std=self.cfg.image_std, + image_crop_size=self.cfg.image_crop_size, + image_aug=image_aug, + modalities=self.cfg.modalities, + is_s2s=self.cfg.is_s2s, + noise_fn=noise_fn, + noise_prob=self.cfg.noise_prob, + noise_snr=noise_snr, + noise_num=noise_num + ) + + # @classmethod + # def _get_size_ratios(cls, ids: List[str], sizes: List[int], alpha: float = 1.0): + # """Size ratios for temperature-based sampling + # (https://arxiv.org/abs/1907.05019)""" + # _sizes = np.array(sizes) + # prob = _sizes / _sizes.sum() + # smoothed_prob = prob ** alpha + # smoothed_prob = smoothed_prob / smoothed_prob.sum() + # size_ratio = (smoothed_prob * _sizes.sum()) / _sizes + + # o_str = str({_i: f"{prob[i]:.3f}" for i, _i in enumerate(ids)}) + # logger.info(f"original sampling probability: {o_str}") + # p_str = str({_i: f"{smoothed_prob[i]:.3f}" for i, _i in enumerate(ids)}) + # logger.info(f"balanced sampling probability: {p_str}") + # sr_str = str({_id: f"{size_ratio[i]:.3f}" for i, _id in enumerate(ids)}) + # logger.info(f"balanced sampling size ratio: {sr_str}") + # return size_ratio.tolist() + + + # def resample_multi_modality_dataset(self, speech_dataset, paired_datasets, epoch=1, train=True): + + # if len(paired_datasets) > 1 and self.cfg.text_sampling_alpha != 1.0: + # size_ratios = self._get_size_ratios( + # paired_splits, [len(s) for s in paired_datasets], alpha=self.cfg.text_sampling_alpha + # ) + # paired_datasets = [ + # ResamplingDataset( + # d, size_ratio=r, seed=0, epoch=epoch, replace=(r >= 1.0) + # ) for d, r in zip(paired_datasets, size_ratios) + # ] + + # dataset_list = [speech_dataset] + # for datasets in [paired_datasets]: + # if len(datasets) > 1: + # dataset_list.append(ConcatDataset(datasets)) + # elif len(datasets) == 1: + # dataset_list.append(datasets[0]) + # else: + # dataset_list.append(None) + + # ### match speech/text datasets according to modality + # dataset_dict = OrderedDict((name, d) for name, d in zip(["speech", "speech_sup", "text_mono", "text_paired"], dataset_list) if d is not None) + # max_positions_dict = { + # "speech": None, + # "speech_sup": None, + # "text_mono": (1024, 1024), + # "text_paired": (1024, 1024), + # } + # max_positions_dict = OrderedDict((name, max_positions_dict[name]) for name in dataset_dict.keys()) + # max_tokens_ratios_dict = { + # "speech": 1.0, + # "speech_sup": 1.0, + # "text_mono": 1.0 / 320 / 1.0, + # "text_paired": 1.0 / 320 / 1.0, + # } + # max_tokens_ratios = [max_tokens_ratios_dict[name] for name in dataset_dict.keys()] + # dataset_lens = np.array([len(dataset) for dataset in dataset_dict.values()]) + # dataset_avg_sample_lens = np.array([ + # sum([dataset.num_tokens(i) for i in np.random.randint(low=0, high=len(dataset), size=10000)]) / 10000.0 + # for dataset in dataset_dict.values() + # ]) + + # if not "speech" in dataset_dict: + # distributions = [l / sum(dataset_lens) for l in dataset_lens] + # else: + # ## we just keep the batches of speech and non-speech the same, expand_coef is to ensure speech batches is less than others + # first_ratio = dataset_lens[0] / sum(dataset_lens) + # expand_coef = 1.8 if sup_dataset is None else 1.1 * sum(dataset_lens[0:2]) / dataset_lens[0] + # distributions = [expand_coef * max_tokens_ratios[i] * dataset_avg_sample_lens[0] / l for (i, l) in enumerate(dataset_avg_sample_lens)] + # distributions[0] = 1.0 + # if sup_dataset is not None: + # distributions[1] = dataset_lens[1] / dataset_lens[0] + # distributions = [first_ratio * d for d in distributions] + + # logging.info(f"Number samples of datasets is {dataset_lens}") + # logging.info(f"Avg sample length of datasets is {dataset_avg_sample_lens}") + # logging.info(f"Sampling distributions is {distributions}") + # logging.info(f"Maxtokens ratio is {max_tokens_ratios}") + # return dataset_dict, max_positions_dict, distributions, max_tokens_ratios + + + def max_positions(self) -> Tuple[int, int]: + return (sys.maxsize, sys.maxsize) + + def filter_indices_by_size( + self, indices: np.array, *args, **kwargs + ) -> np.array: + return indices + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=1, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + """ + Get an iterator that yields batches of data from the given dataset. + Args: + dataset (~fairseq.data.FairseqDataset): dataset to batch + max_tokens (int, optional): max number of tokens in each batch + (default: None). + max_sentences (int, optional): max number of sentences in each + batch (default: None). + max_positions (optional): max sentence length supported by the + model (default: None). + ignore_invalid_inputs (bool, optional): don't raise Exception for + sentences that are too long (default: False). + required_batch_size_multiple (int, optional): require batch size to + be a multiple of N (default: 1). + seed (int, optional): seed for random number generator for + reproducibility (default: 1). + num_shards (int, optional): shard the data iterator into N + shards (default: 1). + shard_id (int, optional): which shard of the data iterator to + return (default: 0). + num_workers (int, optional): how many subprocesses to use for data + loading. 0 means the data will be loaded in the main process + (default: 0). + epoch (int, optional): the epoch to start the iterator from + (default: 1). + data_buffer_size (int, optional): number of batches to + preload (default: 0). + disable_iterator_cache (bool, optional): don't cache the + EpochBatchIterator (ignores `FairseqTask::can_reuse_epoch_itr`) + (default: False). + skip_remainder_batch (bool, optional): if set, discard the last + batch in each training epoch, as the last batch is often smaller than + local_batch_size * distributed_word_size (default: ``True``). + grouped_shuffling (bool, optional): group batches with each groups + containing num_shards batches and shuffle groups. Reduces difference + between sequence lengths among workers for batches sorted by length. + update_epoch_batch_itr (bool optional): if true then donot use the cached + batch iterator for the epoch + + Returns: + ~fairseq.iterators.EpochBatchIterator: a batched iterator over the + given dataset split + """ + + if self.fine_tuning or not isinstance(dataset, MultiCorpusDataset): + return super().get_batch_iterator( + dataset, + max_tokens=max_tokens, + max_sentences=max_sentences, + max_positions=max_positions, + ignore_invalid_inputs=ignore_invalid_inputs, + required_batch_size_multiple=required_batch_size_multiple, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + data_buffer_size=data_buffer_size, + disable_iterator_cache=disable_iterator_cache, + ) + logging.info(f"num_workers is {num_workers}") + can_reuse_epoch_itr = ( + not disable_iterator_cache + and not update_epoch_batch_itr + and self.can_reuse_epoch_itr(dataset) + ) + if can_reuse_epoch_itr and dataset in self.dataset_to_epoch_iter: + logger.debug("reusing EpochBatchIterator for epoch {}".format(epoch)) + return self.dataset_to_epoch_iter[dataset] + + assert isinstance(dataset, FairseqDataset) + + # initialize the dataset with the correct starting epoch + dataset.set_epoch(epoch) + + # get indices ordered by example size + with data_utils.numpy_seed(seed): + indices = dataset.ordered_indices() + + # filter examples that are too large + if max_positions is not None: + indices = self.filter_indices_by_size( + indices, dataset, max_positions, ignore_invalid_inputs + ) + + # create mini-batches with given size constraints + batch_sampler = dataset.get_batch_sampler( + indices, + num_shards, + seed, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + split_modality_batch=self.cfg.split_modality_batch, + ) + + # return a reusable, sharded iterator + + epoch_iter = iterators.EpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_sampler=batch_sampler, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + buffer_size=data_buffer_size, + disable_shuffling=True, + ) + + if can_reuse_epoch_itr: + self.dataset_to_epoch_iter[dataset] = epoch_iter + + return epoch_iter + + + def build_generator( + self, models, args, seq_gen_cls=None, extra_gen_cls_kwargs=None, prefix_allowed_tokens_fn=None, + ): + """ + Build a :class:`~fairseq.SequenceGenerator` instance for this + task. + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + args (fairseq.dataclass.configs.GenerationConfig): + configuration object (dataclass) for generation + extra_gen_cls_kwargs (Dict[str, Any]): extra options to pass + through to SequenceGenerator + prefix_allowed_tokens_fn (Callable[[int, torch.Tensor], List[int]]): + If provided, this function constrains the beam search to + allowed tokens only at each step. The provided function + should take 2 arguments: the batch ID (`batch_id: int`) + and a unidimensional tensor of token ids (`inputs_ids: + torch.Tensor`). It has to return a `List[int]` with the + allowed tokens for the next generation step conditioned + on the previously generated tokens (`inputs_ids`) and + the batch ID (`batch_id`). This argument is useful for + constrained generation conditioned on the prefix, as + described in "Autoregressive Entity Retrieval" + (https://arxiv.org/abs/2010.00904) and + https://github.com/facebookresearch/GENRE. + """ + if getattr(args, "score_reference", False): + from fairseq.sequence_scorer import SequenceScorer + + return SequenceScorer( + self.target_dictionary, + compute_alignment=getattr(args, "print_alignment", False), + ) + + # Choose search strategy. Defaults to Beam Search. + sampling = getattr(args, "sampling", False) + sampling_topk = getattr(args, "sampling_topk", -1) + sampling_topp = getattr(args, "sampling_topp", -1.0) + diverse_beam_groups = getattr(args, "diverse_beam_groups", -1) + diverse_beam_strength = getattr(args, "diverse_beam_strength", 0.5) + match_source_len = getattr(args, "match_source_len", False) + diversity_rate = getattr(args, "diversity_rate", -1) + constrained = getattr(args, "constraints", False) + if prefix_allowed_tokens_fn is None: + prefix_allowed_tokens_fn = getattr(args, "prefix_allowed_tokens_fn", None) + if ( + sum( + int(cond) + for cond in [ + sampling, + diverse_beam_groups > 0, + match_source_len, + diversity_rate > 0, + ] + ) + > 1 + ): + raise ValueError("Provided Search parameters are mutually exclusive.") + assert sampling_topk < 0 or sampling, "--sampling-topk requires --sampling" + assert sampling_topp < 0 or sampling, "--sampling-topp requires --sampling" + + if sampling: + search_strategy = search.Sampling( + self.target_dictionary, sampling_topk, sampling_topp + ) + elif diverse_beam_groups > 0: + search_strategy = search.DiverseBeamSearch( + self.target_dictionary, diverse_beam_groups, diverse_beam_strength + ) + elif match_source_len: + # this is useful for tagging applications where the output + # length should match the input length, so we hardcode the + # length constraints for simplicity + search_strategy = search.LengthConstrainedBeamSearch( + self.target_dictionary, + min_len_a=1, + min_len_b=0, + max_len_a=1, + max_len_b=0, + ) + elif diversity_rate > -1: + search_strategy = search.DiverseSiblingsSearch( + self.target_dictionary, diversity_rate + ) + elif constrained: + search_strategy = search.LexicallyConstrainedBeamSearch( + self.target_dictionary, args.constraints + ) + elif prefix_allowed_tokens_fn: + search_strategy = search.PrefixConstrainedBeamSearch( + self.target_dictionary, prefix_allowed_tokens_fn + ) + else: + search_strategy = search.BeamSearch(self.target_dictionary) + + extra_gen_cls_kwargs = extra_gen_cls_kwargs or {} + if seq_gen_cls is None: + if getattr(args, "print_alignment", False): + seq_gen_cls = SequenceGeneratorWithAlignment + extra_gen_cls_kwargs["print_alignment"] = args.print_alignment + else: + seq_gen_cls = SequenceGenerator + + return seq_gen_cls( + models, + self.target_dictionary, + beam_size=getattr(args, "beam", 5), + max_len_a=getattr(args, "max_len_a", 0), + max_len_b=getattr(args, "max_len_b", 200), + min_len=getattr(args, "min_len", 1), + normalize_scores=(not getattr(args, "unnormalized", False)), + len_penalty=getattr(args, "lenpen", 1), + unk_penalty=getattr(args, "unkpen", 0), + temperature=getattr(args, "temperature", 1.0), + match_source_len=getattr(args, "match_source_len", False), + no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), + search_strategy=search_strategy, + **extra_gen_cls_kwargs, + ) diff --git a/SpeechT5/VATLM/vat_hubert/vathubert/utils.py b/SpeechT5/VATLM/vat_hubert/vathubert/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..60d57fa006adbb9839e1c3501b3442917bb0df3e --- /dev/null +++ b/SpeechT5/VATLM/vat_hubert/vathubert/utils.py @@ -0,0 +1,298 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import cv2 +import torch +import random +import numpy as np +from typing import Dict, List, Optional, Tuple + +def load_video(path): + for i in range(3): + try: + cap = cv2.VideoCapture(path) + frames = [] + while True: + ret, frame = cap.read() + if ret: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + frames.append(frame) + else: + break + frames = np.stack(frames) + return frames + except Exception: + print(f"failed loading {path} ({i} / 3)") + if i == 2: + raise ValueError(f"Unable to load {path}") + + +class Compose(object): + """Compose several preprocess together. + Args: + preprocess (list of ``Preprocess`` objects): list of preprocess to compose. + """ + + def __init__(self, preprocess): + self.preprocess = preprocess + + def __call__(self, sample): + for t in self.preprocess: + sample = t(sample) + return sample + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.preprocess: + format_string += '\n' + format_string += ' {0}'.format(t) + format_string += '\n)' + return format_string + + +class Normalize(object): + """Normalize a ndarray image with mean and standard deviation. + """ + + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, frames): + """ + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be normalized. + Returns: + Tensor: Normalized Tensor image. + """ + frames = (frames - self.mean) / self.std + return frames + + def __repr__(self): + return self.__class__.__name__+'(mean={0}, std={1})'.format(self.mean, self.std) + +class CenterCrop(object): + """Crop the given image at the center + """ + def __init__(self, size): + self.size = size + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be cropped. + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + th, tw = self.size + delta_w = int(round((w - tw))/2.) + delta_h = int(round((h - th))/2.) + frames = frames[:, delta_h:delta_h+th, delta_w:delta_w+tw] + return frames + + +class RandomCrop(object): + """Crop the given image at the center + """ + + def __init__(self, size): + self.size = size + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be cropped. + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + th, tw = self.size + delta_w = random.randint(0, w-tw) + delta_h = random.randint(0, h-th) + frames = frames[:, delta_h:delta_h+th, delta_w:delta_w+tw] + return frames + + def __repr__(self): + return self.__class__.__name__ + '(size={0})'.format(self.size) + +class HorizontalFlip(object): + """Flip image horizontally. + """ + + def __init__(self, flip_ratio): + self.flip_ratio = flip_ratio + + def __call__(self, frames): + """ + Args: + img (numpy.ndarray): Images to be flipped with a probability flip_ratio + Returns: + numpy.ndarray: Cropped image. + """ + t, h, w = frames.shape + if random.random() < self.flip_ratio: + for index in range(t): + frames[index] = cv2.flip(frames[index], 1) + return frames + +def compute_mask_indices( + shape: Tuple[int, int], + padding_mask: Optional[torch.Tensor], + mask_prob: float, + mask_length: int, + mask_type: str = "static", + mask_other: float = 0.0, + min_masks: int = 0, + no_overlap: bool = False, + min_space: int = 0, +) -> np.ndarray: + """ + Computes random mask spans for a given shape + Args: + shape: the the shape for which to compute masks. + should be of size 2 where first element is batch size and 2nd is timesteps + padding_mask: optional padding mask of the same size as shape, which will prevent masking padded elements + mask_prob: probability for each token to be chosen as start of the span to be masked. this will be multiplied by + number of timesteps divided by length of mask span to mask approximately this percentage of all elements. + however due to overlaps, the actual number will be smaller (unless no_overlap is True) + mask_type: how to compute mask lengths + static = fixed size + uniform = sample from uniform distribution [mask_other, mask_length*2] + normal = sample from normal distribution with mean mask_length and stdev mask_other. mask is min 1 element + poisson = sample from possion distribution with lambda = mask length + min_masks: minimum number of masked spans + no_overlap: if false, will switch to an alternative recursive algorithm that prevents spans from overlapping + min_space: only used if no_overlap is True, this is how many elements to keep unmasked between spans + """ + + bsz, all_sz = shape + mask = np.full((bsz, all_sz), False) + + all_num_mask = int( + # add a random number for probabilistic rounding + mask_prob * all_sz / float(mask_length) + + np.random.rand() + ) + + all_num_mask = max(min_masks, all_num_mask) + + mask_idcs = [] + for i in range(bsz): + if padding_mask is not None: + sz = all_sz - padding_mask[i].long().sum().item() + num_mask = int( + # add a random number for probabilistic rounding + mask_prob * sz / float(mask_length) + + np.random.rand() + ) + num_mask = max(min_masks, num_mask) + else: + sz = all_sz + num_mask = all_num_mask + + if mask_type == "static": + lengths = np.full(num_mask, mask_length) + elif mask_type == "uniform": + lengths = np.random.randint(mask_other, mask_length * 2 + 1, size=num_mask) + elif mask_type == "normal": + lengths = np.random.normal(mask_length, mask_other, size=num_mask) + lengths = [max(1, int(round(x))) for x in lengths] + elif mask_type == "poisson": + lengths = np.random.poisson(mask_length, size=num_mask) + lengths = [int(round(x)) for x in lengths] + else: + raise Exception("unknown mask selection " + mask_type) + + if sum(lengths) == 0: + lengths[0] = min(mask_length, sz - 1) + + if no_overlap: + mask_idc = [] + + def arrange(s, e, length, keep_length): + span_start = np.random.randint(s, e - length) + mask_idc.extend(span_start + i for i in range(length)) + + new_parts = [] + if span_start - s - min_space >= keep_length: + new_parts.append((s, span_start - min_space + 1)) + if e - span_start - keep_length - min_space > keep_length: + new_parts.append((span_start + length + min_space, e)) + return new_parts + + parts = [(0, sz)] + min_length = min(lengths) + for length in sorted(lengths, reverse=True): + lens = np.fromiter( + (e - s if e - s >= length + min_space else 0 for s, e in parts), + np.int, + ) + l_sum = np.sum(lens) + if l_sum == 0: + break + probs = lens / np.sum(lens) + c = np.random.choice(len(parts), p=probs) + s, e = parts.pop(c) + parts.extend(arrange(s, e, length, min_length)) + mask_idc = np.asarray(mask_idc) + else: + min_len = min(lengths) + if sz - min_len <= num_mask: + min_len = sz - num_mask - 1 + + mask_idc = np.random.choice(sz - min_len, num_mask, replace=False) + + mask_idc = np.asarray( + [ + mask_idc[j] + offset + for j in range(len(mask_idc)) + for offset in range(lengths[j]) + ] + ) + + mask_idcs.append(np.unique(mask_idc[mask_idc < sz])) + + min_len = min([len(m) for m in mask_idcs]) + batch_indexes, starts, ends = [], [], [] + for i, mask_idc in enumerate(mask_idcs): + if len(mask_idc) > min_len: + mask_idc = np.random.choice(mask_idc, min_len, replace=False) + mask[i, mask_idc] = True + vals, run_starts, run_lengths = find_runs(mask[i]) + start_indices, lengths = run_starts[vals == True], run_lengths[vals == True] + starts.append(start_indices) + ends.append(start_indices+lengths) + batch_indexes.append(np.zeros([len(start_indices)])+i) + return mask, np.concatenate(starts).astype(np.int64), np.concatenate(ends).astype(np.int64), np.concatenate(batch_indexes).astype(np.int64) + +def find_runs(x): + """Find runs of consecutive items in an array.""" + + # ensure array + x = np.asanyarray(x) + if x.ndim != 1: + raise ValueError('only 1D array supported') + n = x.shape[0] + + # handle empty array + if n == 0: + return np.array([]), np.array([]), np.array([]) + + else: + # find run starts + loc_run_start = np.empty(n, dtype=bool) + loc_run_start[0] = True + np.not_equal(x[:-1], x[1:], out=loc_run_start[1:]) + run_starts = np.nonzero(loc_run_start)[0] + + # find run values + run_values = x[loc_run_start] + + # find run lengths + run_lengths = np.diff(np.append(run_starts, n)) + + return run_values, run_starts, run_lengths diff --git a/SpeechT5/YiTrans/exp_scripts/finetune_ASR/finetune_hubert24_mbart24_en.sh b/SpeechT5/YiTrans/exp_scripts/finetune_ASR/finetune_hubert24_mbart24_en.sh new file mode 100644 index 0000000000000000000000000000000000000000..4f6a2d443fe739f3f31b7458c7e08da13db5a639 --- /dev/null +++ b/SpeechT5/YiTrans/exp_scripts/finetune_ASR/finetune_hubert24_mbart24_en.sh @@ -0,0 +1,67 @@ +world_size=$1 +update_freq=$2 +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=8 + +EXP_NAME=train_iwslt_asr_hubert24_mbart24_norel +SAVE_DIR=${HOME}/data/iwslt/asr_v3/${EXP_NAME} + +DATA_ROOT=${HOME}/dataset/iwslt_mustc +LABEL_DIR=${DATA_ROOT}/fine-tune_en_bpe250k +SP_PATH=${LABEL_DIR}/sentence.bpe.model +retain_dict=${LABEL_DIR}/index_en_onlyMUSTC +W2V_PATH=${HOME}/dataset/iwslt_mustc/pretrain_ed_model_cfg.pt + +TRAIN_SUBSET=train_asr_MUSTC +VALID_SUBSET=dev_asr_MUSTC + + +mbart_path="/mnt/default/v-junyiao/released_exsp/mbart50.pretrained/model.pt" +hubert_path="/mnt/default/v-junyiao/speechexp/fairseq_mlst/hubert_large_librivox_released/checkpoint_last.pt" + +CODE_ROOT=${HOME}/code/SpeechT5/YiTrans + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/yitrans_iwslt22/config/finetune_asr \ + --config-name large_mustc \ + common.user_dir=$CODE_ROOT/yitrans_iwslt22 \ + distributed_training.distributed_world_size=$world_size \ + optimization.update_freq=[$update_freq] \ + \ + dataset.max_tokens=400001 \ + dataset.num_workers=0 \ + optimization.max_update=120000 \ + \ + task._name="iwslt_joint_pretraining" \ + task.data=${DATA_ROOT} \ + task.label_dir=${LABEL_DIR} \ + +task.store_labels=True \ + task.hubert_tokenizer="sentencepiece" \ + task.sp_path=${SP_PATH} \ + task.max_keep_size=400000 \ + criterion.dec_weight=0.5 \ + \ + model._name="yitrans_asr" \ + model.w2v_path=${W2V_PATH} \ + +model.reuse_text_emb=true \ + +model.share_ctc_decoder_embed=true \ + +model.retain_dict_path=${retain_dict} \ + model.freeze_finetune_updates=15000 \ + \ + +model.no_pretrained_weights=true \ + +model.use_rel_pos_enc=false \ + +model.encoder_layers=24 \ + +model.add_text_encoder=true \ + +model.share_s2t_t2t_embeddings=false \ + +model.share_enc_dec_embeddings=false \ + +model.add_adaptor=false \ + +model.load_pretrained_w2v_from=$hubert_path \ + +model.load_pretrained_mbart_from=$mbart_path \ + \ + dataset.train_subset=${TRAIN_SUBSET} \ + dataset.valid_subset=${VALID_SUBSET} \ + checkpoint.save_dir=${SAVE_DIR} \ + common.tensorboard_logdir=${SAVE_DIR} \ + hydra.run.dir=${SAVE_DIR} \ + hydra.job.name=${EXP_NAME} + diff --git a/SpeechT5/YiTrans/exp_scripts/finetune_MT/finetune_mbart_en-de.sh b/SpeechT5/YiTrans/exp_scripts/finetune_MT/finetune_mbart_en-de.sh new file mode 100644 index 0000000000000000000000000000000000000000..8abce531d8e3b63af5bde5105d4bcd42404176e1 --- /dev/null +++ b/SpeechT5/YiTrans/exp_scripts/finetune_MT/finetune_mbart_en-de.sh @@ -0,0 +1,75 @@ +##################################### +# Hubert ED model # +##################################### +[ $# -gt 2 ] && echo "Usage: $0 [w2v_path] [mbart_path]" && exit 0 +world_size=$1 +update_freq=$2 +w2v_path=$3 +mbart_path=$4 + +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=2 +[ -z $w2v_path ] && w2v_path=${HOME}/dataset/iwslt_mustc/pretrain_ed_model_cfg.pt +[ -z $mbart_path ] && mbart_path="/mnt/default/v-junyiao/released_exsp/mbart50.pretrained/model.pt" +langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN,af_ZA,az_AZ,bn_IN,fa_IR,he_IL,hr_HR,id_ID,ka_GE,km_KH,mk_MK,ml_IN,mn_MN,mr_IN,pl_PL,ps_AF,pt_XX,sv_SE,sw_KE,ta_IN,te_IN,th_TH,tl_XX,uk_UA,ur_PK,xh_ZA,gl_ES,sl_SI + +DATA_DIR=/mnt/default/lozhou/speechdata/mt_data/en-de/com-filter-ende/bin-idx +exp_name=tune_mbart_com_filter_le-4 +SAVE_DIR="${HOME}/data/iwslt/mt_stage1_en-de/$exp_name" +[ -d $SAVE_DIR ] || mkdir -p $SAVE_DIR + +CODE_ROOT=${HOME}/code/SpeechT5/YiTrans + +python $CODE_ROOT/fairseq/fairseq_cli/hydra_train.py \ + --config-dir $CODE_ROOT/yitrans_iwslt22/config/finetune_mt \ + --config-name mt_translation \ + common.user_dir=$CODE_ROOT/yitrans_iwslt22 \ + distributed_training.distributed_world_size=${world_size} \ + optimization.update_freq=[$update_freq] \ + \ + +task.data=$DATA_DIR \ + +task.source_lang="en_XX" +task.target_lang="de_DE" \ + +task.langs=\"$langs\" \ + +task.normalize=false \ + +task.append_source_id=true \ + \ + +model.dropout=0.2 \ + +model.attention_dropout=0.1 \ + model.activation_dropout=0.1 \ + model.decoder_layerdrop=0 \ + model.layerdrop=0 \ + model.freeze_finetune_updates=0 \ + \ + model.w2v_path=$w2v_path \ + +model.no_pretrained_weights=true \ + +model.load_pretrained_mbart_from=$mbart_path \ + +model.share_enc_dec_embeddings=true \ + +model.share_s2t_t2t_embeddings=false \ + +model.use_rel_pos_enc=false \ + \ + dataset.train_subset="train" \ + dataset.valid_subset="valid" \ + dataset.num_workers=4 \ + dataset.max_tokens=2000 \ + \ + optimization.max_epoch=50 \ + optimization.clip_norm=5 \ + optimization.max_update=200000 \ + lr_scheduler.total_num_update=200000 \ + \ + checkpoint.save_interval=1 \ + checkpoint.save_interval_updates=5000 \ + checkpoint.keep_last_epochs=5 \ + checkpoint.keep_best_checkpoints=5 \ + \ + common.seed=222 \ + common.log_interval=100 \ + common.log_format="json" \ + \ + checkpoint.best_checkpoint_metric="accuracy" \ + checkpoint.maximize_best_checkpoint_metric=true \ + common.tensorboard_logdir=$SAVE_DIR \ + checkpoint.save_dir=$SAVE_DIR \ + hydra.run.dir=$SAVE_DIR \ + hydra.job.name=$exp_name + diff --git a/SpeechT5/YiTrans/exp_scripts/finetune_ST/en-de/jtst_pt36s2_mustc.sh b/SpeechT5/YiTrans/exp_scripts/finetune_ST/en-de/jtst_pt36s2_mustc.sh new file mode 100644 index 0000000000000000000000000000000000000000..df763ee898c778504827bc77607496e283fea114 --- /dev/null +++ b/SpeechT5/YiTrans/exp_scripts/finetune_ST/en-de/jtst_pt36s2_mustc.sh @@ -0,0 +1,83 @@ +world_size=$1 +update_freq=$2 +[ -z $world_size ] && world_size=8 +[ -z $update_freq ] && update_freq=4 + +DATA_DIR=/mnt/default/lozhou/speechdata/st_data/en-de/com2-ende-newmt +EXP_NAME="jt_st_mustc_large_stage2_300k_11sets" +SAVE_DIR=/mnt/default/v-ziqzhang/data/iwslt/st_en-de_v4/${EXP_NAME} +retain_dict=/mnt/default/v-junyiao/dataset/iwslt/en-de/released/analyse/index_asr_st_onlyMUSTC +W2V_PATH1=/mnt/default/v-junyiao/speechexp/train_speech_text_joint_addadaptor_bpecode_large_step1_mbartpt_400k/checkpoint_last.pt +W2V_PATH2=/mnt/default/v-junyiao/speechexp/fairseq_mlst/train_speech_text_joint_adaptor_large_step2_300k/checkpoint_last.pt +mkdir -p ${SAVE_DIR} + +FAIRSEQ_ROOT=/mnt/default/v-ziqzhang/code/fairseq_mlst + +python $FAIRSEQ_ROOT/fairseq_cli/train.py ${DATA_DIR} \ + --save-dir ${SAVE_DIR} \ + --user-dir examples/speech_text_joint_to_text \ + --task speech_text_joint_to_text \ + --config-yaml config_step1_39k.yaml \ + --train-subset "train_11set_st_addsrc" \ + --valid-subset "dev_mustc2_en_de_addsrc_st" \ + --fp16 \ + --seed 1 \ + \ + --ddp-backend no_c10d \ + --distributed-world-size ${world_size} \ + --tensorboard-logdir ${SAVE_DIR} \ + \ + --criterion guided_label_smoothed_cross_entropy_with_accuracy \ + --label-smoothing 0.3 \ + --guide-alpha 0.8 \ + --disable-text-guide-update-num 5000 \ + --attentive-cost-regularization 0.02 \ + \ + --optimizer adam \ + --clip-norm 1.0 \ + --lr 5e-05 \ + --lr-scheduler polynomial_decay --warmup-updates 5000 \ + --warmup-updates 5000 \ + --max-update 200000 \ + --total-num-update 200000 \ + --update-freq ${update_freq} \ + \ + --max-tokens 450000 \ + --max-sentences 3 \ + --max-tokens-valid 500000 \ + --max-source-positions 450000 \ + --skip-invalid-size-inputs-valid-test \ + --num-workers 0 \ + --save-interval 1 \ + --log-format json \ + --log-interval 100 \ + --best-checkpoint-metric "acc" \ + --maximize-best-checkpoint-metric \ + \ + --arch "hubert_st2t" \ + --w2v-path ${W2V_PATH1} \ + --load-step2-model-from ${W2V_PATH2} \ + --no-pretrained-weights \ + --add-decoder \ + --reuse-text-emb \ + --layerdrop 0.1 \ + --activation-dropout 0.1 \ + --decoder-layerdrop 0.1 \ + --freeze-finetune-updates 0 \ + --feature-grad-mult 1.0 \ + --retain-dict-path ${retain_dict} \ + --share-decoder-input-output-embed \ + --share-speech-text-embeddings \ + \ + --save-interval-updates 2000 \ + --keep-interval-updates 5 \ + --keep-interval-updates-pattern 10000 \ + --keep-last-epochs 5 \ + \ + 2>&1 | tee ${SAVE_DIR}/train.log + +sleep 5s + + # --lr-scheduler inverse_sqrt \ + # --load-step2-model-from ${W2V_PATH2} \ + # --no-pretrained-weights \ diff --git a/SpeechT5/YiTrans/exp_scripts/pretrain/pretrain_pt36_adaptor_step1.sh b/SpeechT5/YiTrans/exp_scripts/pretrain/pretrain_pt36_adaptor_step1.sh new file mode 100644 index 0000000000000000000000000000000000000000..8b7a55a93f6a2d9e3a002ff9e1d8958676e46ed3 --- /dev/null +++ b/SpeechT5/YiTrans/exp_scripts/pretrain/pretrain_pt36_adaptor_step1.sh @@ -0,0 +1,46 @@ +export HYDRA_FULL_ERROR=1 +YiTrans=/home/v-ziqzhang/Code/SpeechT5/YiTrans +DATA_DIR=/mnt/default/lozhou/speechdata/hubert_data +LABEL_DIR=${DATA_DIR}/layer9_k500_label +SP_PATH=${LABEL_DIR}/spm_unigram8000.model +TEXT_DATA_DIR=/mnt/default/lozhou/speechdata/text_data/v3/bin_idx_step1 +EXP_NAME=pretrain_pt36_addadaptor_bpecode_large_step1 +SAVE_DIR=${HOME}/data/speechexp/${EXP_NAME} +W2V_PATH=${HOME}/data/speechexp/hubert_large_librivox_released/checkpoint_last.pt +MBART_PATH=${HOME}/data/speechexp/mbart50.pretrained/model.pt + +python ${YiTrans}/fairseq/fairseq_cli/hydra_train.py \ + --config-dir ${YiTrans}/yitrans_iwslt22/config/pretrain \ + --config-name joint_large \ + common.user_dir=${YiTrans}/yitrans_iwslt22 \ + \ + task.data=$DATA_DIR \ + task.labels='["km"]' \ + task.label_dir=$LABEL_DIR \ + task.text_cfg.text_data=$TEXT_DATA_DIR \ + +task.hubert_tokenizer="sentencepiece" \ + +task.sp_path=${SP_PATH} \ + \ + model.label_rate=50 \ + model.encoder_layers=12 \ + +model.load_pretrained_w2v_from=${W2V_PATH} \ + +model.load_pretrained_mbart_from=${MBART_PATH} \ + \ + dataset.train_subset=\"train_LS,train_MUSTC+mono_deduped_filt_sort.en_XX.en_XX,mt8corpus_filt_slct.en_XX-de_DE\" \ + dataset.valid_subset=\"dev_MUSTC+valid.en_XX-de_DE,dev_MUSTC+valid.en_XX-ja_XX,dev_MUSTC+valid.en_XX-zh_CN,dev_MUSTC+dev4x.en_XX.en_XX\" \ + dataset.max_tokens=300000 \ + \ + distributed_training.distributed_world_size=8 \ + distributed_training.nprocs_per_node=8 \ + optimization.update_freq=[2] \ + \ + common.tensorboard_logdir=$SAVE_DIR \ + checkpoint.save_dir=$SAVE_DIR \ + hydra.run.dir=$SAVE_DIR \ + hydra.job.name=$EXP_NAME \ + checkpoint.reset_optimizer=true \ + checkpoint.reset_dataloader=true + + + + # dataset.train_subset=\"train_CV,train_EUR,train_LS,train_MUSTC,train_TEDLIUM,train_VP+mono_deduped_filt_sort.en_XX.en_XX,mt8corpus_filt_slct.en_XX-de_DE,mt8corpus_filt_slct.en_XX-ja_XX,mt8corpus_filt_slct.en_XX-zh_CN\" \ diff --git a/SpeechT5/YiTrans/exp_scripts/pretrain/pretrain_pt36_adaptor_step2.sh b/SpeechT5/YiTrans/exp_scripts/pretrain/pretrain_pt36_adaptor_step2.sh new file mode 100644 index 0000000000000000000000000000000000000000..756b93a7c2c7e8b20ef9551bdab2f9863388bb9c --- /dev/null +++ b/SpeechT5/YiTrans/exp_scripts/pretrain/pretrain_pt36_adaptor_step2.sh @@ -0,0 +1,45 @@ +EXP_NAME=train_speech_text_joint_adaptor_large_step2_300k +SAVE_DIR=/datablob/users/v-junyiao/speechexp/fairseq_mlst/${EXP_NAME} +DATA_ROOT=/datablob/users/v-junyiao/speechdata/hubert_mlst +LABEL_DIR=${DATA_ROOT}/fine-tune_en_bpe250k_full +W2V_PATH=/mnt/default/v-junyiao/speechexp/train_speech_text_joint_addadaptor_bpecode_large_step1_mbartpt_400k/checkpoint_last_up.pt +TEXT_DATA_DIR=/datablob/users/v-junyiao/speechdata/text_data/v4/bin-idx +SP_PATH=${LABEL_DIR}/sentence.bpe.model +# export CUDA_VISIBLE_DEVICES=1 +python fairseq_cli/hydra_train.py \ + --config-dir examples/hubert/config/pretrain \ + --config-name pretrain_step2 \ + distributed_training.distributed_world_size=64 \ + distributed_training.nprocs_per_node=8 \ + \ + dataset.train_subset=\"train_COVOST,train_asr_VP,train_punc_TEDLIUM,train_asr_MUSTC,train_punc_LS,train_asr_EUR+covost2.en_XX-ja_XX,covost2.en_XX-zh_CN,covost_eurST.en_XX-de_DE,mt8corpus_domain45.en_XX-ja_XX,mt8corpus_filt_slct80_domain44.en_XX-de_DE,mt8corpus_filt_slct80_domain40.en_XX-zh_CN,train.en_XX-de_DE,train.en_XX-ja_XX,train.en_XX-zh_CN\" \ + dataset.valid_subset=\"dev_asr_MUSTC+valid.en_XX-de_DE,dev_asr_MUSTC+valid.en_XX-ja_XX,dev_asr_MUSTC+valid.en_XX-zh_CN\" \ + dataset.max_tokens=480001 \ + dataset.num_workers=0 \ + optimization.update_freq=[1] \ + optimization.max_update=300000 \ + \ + task.hubert_tokenizer="sentencepiece" \ + task.sp_path=${SP_PATH} \ + task.max_keep_size=480000 \ + +task.split_modality_batch=true \ + +task.speech_tgt_lang="en_XX" \ + +task.mbart_style_lang_id=true \ + +task.text_sampling_alpha=1.0 \ + +task.store_labels=true \ + model.freeze_finetune_updates=15000 \ + criterion.dec_weight=0.5 \ + +model.reuse_text_emb=true \ + +model.share_ctc_decoder_embed=true \ + +model.share_speech_text_embeddings=true \ + \ + task.data=${DATA_ROOT} \ + task.label_dir=${LABEL_DIR} \ + task.text_cfg.text_data=${TEXT_DATA_DIR} \ + model.w2v_path=${W2V_PATH} \ + checkpoint.save_dir=${SAVE_DIR} \ + common.tensorboard_logdir=${SAVE_DIR} \ + hydra.run.dir=${SAVE_DIR} \ + hydra.job.name=${EXP_NAME} + +sleep infinity diff --git a/SpeechT5/YiTrans/readme.md b/SpeechT5/YiTrans/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..ea957c46fb011286fcee55516efcbebb42b01001 --- /dev/null +++ b/SpeechT5/YiTrans/readme.md @@ -0,0 +1,98 @@ +# YiTrans@IWSLT22 + +> [**YiTrans**](https://arxiv.org/abs/2206.05777) (```IWSLT 2022```): **The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task** +> Code is being merged to this repository, thanks for your attention + +## Setup +```bash +git clone https://github.com/microsoft/SpeechT5.git +git submodule update --init YiTrans/fairseq +cd YiTrans/fairseq +pip install -e . +``` + +## Data Preparation +### Speech/ASR data for pre-training +Please follow the steps of data preparation for HuBERT in [here](https://github.com/facebookresearch/fairseq/tree/main/examples/hubert#data-preparation). +### Monolingual text data for pre-training +Please follow the steps of data preparation for mBART in [here](https://github.com/facebookresearch/fairseq/tree/main/examples/mbart). We reuse the multilingual vocabulary. +After getting your subset.{idx,bin} files ready, renaming them as subset.lang.lang.{idx,bin}, e.g. +``` +mono_deduped_filt_sort.en_XX.en_XX.bin +mono_deduped_filt_sort.en_XX.en_XX.idx +``` +### Bilingual text data for pre-training +The same way of preparing monolingual data with only the difference that you should prepare for both the source language and the target languages. Renaming them as subset.src-tgt.{src,tgt}.{idx,bin}, e.g. +``` +mt8corpus_filt_slct.en_XX-de_DE.de_DE.bin +mt8corpus_filt_slct.en_XX-de_DE.de_DE.idx +mt8corpus_filt_slct.en_XX-de_DE.en_XX.bin +mt8corpus_filt_slct.en_XX-de_DE.en_XX.idx +``` + +### ST data for fine-tuning +Please follow the steps of data preparation for S2T tasks [here](https://github.com/pytorch/fairseq/blob/main/examples/speech_to_text/docs/mustc_example.md). Your tsv file should be like this: +``` +id audio n_frames tgt_text speaker src_text src_lang tgt_lang +ted_1_0 /mnt/speechdata/MUSTC/en-de/flac/ted_1_0.flac 25920 Hinter mir war gar keine Autokolonne. spk.1 There was no motorcade back there. en_XX de_DE +ted_1_1 /mnt/speechdata/MUSTC/en-de/flac/ted_1_1.flac 219359 Haben Sie schon mal vom Phantomschmerz gehört? (Lachen) Wir saßen in einem gemieteten Ford Taurus. spk.1 (Laughter) You've heard of phantom limb pain? (Laughter) en_XX de_DE +ted_1_2 /mnt/speechdata/MUSTC/en-de/flac/ted_1_2.flac 71360 Es war Zeit zum Abendessen und wir hielten Ausschau nach einem Restaurant. spk.1 It was dinnertime, and we started looking for a place to eat. en_XX de_DE +``` + + + +## Pre-train +For example of pre-training the PT36 model, please follow these steps: + +Step 0: Download the released [Hubert model](https://dl.fbaipublicfiles.com/hubert/hubert_large_ll60k.pt) and [mBART model](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.pretrained.tar.gz) model. + +Step 1: Pre-training with unlabeled speech data and monolingual/bilingual text data +```bash +bash YiTrans/exp_scripts/pretrain/pretrain_pt36_adaptor_step1.sh +``` + +Step 2: Pre-training with ASR dat and domain-filtered bilingual text data +```bash +bash YiTrans/exp_scripts/pretrain/pretrain_pt36_adaptor_step2.sh +``` +Other configurations like training PT48 can also be fould in ./YiTrans/exp_scripts/pretrain, you might need to modify the PATH variables in .sh files to adjust your data. + +## Fine-tune +For example of pre-training En-De ST model on MuST-C dataset, +```bash +bash YiTrans/exp_scripts/finetune_ST/en-de/jtst_pt36s2_mustc.sh +``` +Other configurations like different translation directions or datasets could be found in ./YiTrans/exp_scripts/finetune_ST, you might need to modify the PATH variables in .sh files to adjust your data. + +## Cascaded system +You can also build a cascaded ST system (ASR+MT) with our codebase. +1. ASR model: fine-tune from the cascade of [Hubert Large](https://dl.fbaipublicfiles.com/hubert/hubert_large_ll60k.pt) and [mBART model](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.pretrained.tar.gz): + ```bash + # change the mbart_path/hubert_path to your own in the *.sh + bash YiTrans/exp_scripts/finetune_ASR/finetune_hubert24_mbart24_en.sh + ``` + Check the [`.sh`](exp_scripts/finetune_ASR/finetune_hubert24_mbart24_en.sh) file for more information about the configuration. + +2. MT model: fine-tune from [mBART model](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.pretrained.tar.gz): + + ```bash + # change the mbart_path to your own in the *.sh + bash YiTrans/exp_scripts/finetune_MT/finetune_mbart_en-de.sh + ``` + Check the [`.sh`](exp_scripts/finetune_MT/finetune_mbart_en-de.sh) file for more information about the configuration. + + +## Reference + +If you find our work is useful in your research, please cite the following paper: + +```bibtex +@article{Zhang2022Yitrans, + title = {The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task}, + author = {Zhang, Ziqiang and Ao, Junyi and Zhou, Long and Liu, Shujie and Wei, Furu and Li, Jinyu}, + eprint={2206.05777}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + year={2022} +} +``` diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/__init__.py b/SpeechT5/YiTrans/yitrans_iwslt22/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97327d269e93a13cd135f6c1a187fd820a8decb8 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/__init__.py @@ -0,0 +1 @@ +from . import data, tasks, criterions, models diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/config/finetune_asr/large_mustc.yaml b/SpeechT5/YiTrans/yitrans_iwslt22/config/finetune_asr/large_mustc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce9052e29d72402ef4b01340b0d692dce24ee6df --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/config/finetune_asr/large_mustc.yaml @@ -0,0 +1,103 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + no_epoch_checkpoints: false + best_checkpoint_metric: dec_accuracy + restore_file: checkpoint_last.pt + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: legacy_ddp + find_unused_parameters: true + distributed_world_size: 1 + distributed_port: -1 + nprocs_per_node: 8 + +task: + _name: iwslt_joint_pretraining + data: ??? + fine_tuning: true + label_dir: ??? + normalize: true # must be consistent with pre-training + labels: ["ltr"] + single_target: true + add_decoder: true + pad_audio: true + random_crop: false + max_keep_size: 480000 + hubert_tokenizer: "none" + sp_path: None + +dataset: + num_workers: 6 + max_tokens: 1280000 + skip_invalid_size_inputs_valid_test: true + train_subset: train_100 + valid_subset: dev_other + required_batch_size_multiple: 1 + +criterion: + _name: ctc_ce + zero_infinity: true + dec_weight: 1.0 + +optimization: + max_update: 80000 + lr: [0.00003] + sentence_avg: true + update_freq: [1] + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-08 + weight_decay: 0.0 + +lr_scheduler: + _name: tri_stage + phase_ratio: [0.1, 0.4, 0.5] + final_lr_scale: 0.05 + +model: + _name: yitrans_asr + w2v_path: ??? + apply_mask: true + mask_prob: 0.5 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.1 + decoder_layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + add_decoder: true + share_decoder_input_output_embed: true + + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + - criterion.wer_kenlm_model + - criterion.wer_lexicon + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/config/finetune_mt/mt_translation.yaml b/SpeechT5/YiTrans/yitrans_iwslt22/config/finetune_mt/mt_translation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0d02d4ab8ea29bcd0b2eb8833c8876f88de99dd --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/config/finetune_mt/mt_translation.yaml @@ -0,0 +1,89 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + tensorboard_logdir: tblog + seed: 1337 + +checkpoint: + save_interval: 1000000 + keep_last_epochs: 5 + save_interval_updates: 10000 + keep_interval_updates_pattern: 20000 + keep_interval_updates: 5 + keep_best_checkpoints: 5 + best_checkpoint_metric: accuracy + maximize_best_checkpoint_metric: true + +distributed_training: + ddp_backend: legacy_ddp + find_unused_parameters: true + distributed_world_size: -1 + nprocs_per_node: 8 + + +criterion: + _name: "label_smoothed_cross_entropy" + label_smoothing: 0.2 + report_accuracy: true + + +task: + _name: "iwslt_translation_from_pretrained" + +dataset: + num_workers: 6 + max_tokens: 3200000 + skip_invalid_size_inputs_valid_test: true + validate_after_updates: ${model.freeze_finetune_updates} + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + train_subset: train_100 + valid_subset: dev_other + required_batch_size_multiple: 1 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.0 + +lr_scheduler: + lr: [0.0001] + _name: polynomial_decay + warmup_updates: 5000 + total_num_update: 200000 + +model: + _name: finetune_mt + w2v_path: ??? + apply_mask: true + mask_prob: 0.65 + mask_channel_prob: 0.5 + mask_channel_length: 64 + layerdrop: 0.1 + decoder_layerdrop: 0.1 + activation_dropout: 0.1 + feature_grad_mult: 0.0 + freeze_finetune_updates: 0 + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + - model.w2v_path + - dataset.train_subset + - dataset.valid_subset + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/config/pretrain/joint_base.yaml b/SpeechT5/YiTrans/yitrans_iwslt22/config/pretrain/joint_base.yaml new file mode 100644 index 0000000000000000000000000000000000000000..deb7cda6c7571dc4ab25d63da0110c2e11c35a3d --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/config/pretrain/joint_base.yaml @@ -0,0 +1,134 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 1 + keep_last_epochs: 1 + save_interval_updates: 5000 + keep_interval_updates: -1 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 32 + distributed_port: 29671 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: iwslt_joint_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: false # must be consistent with extractor + add_decoder: false + text_cfg: + seed: ${common.seed} + text_data: ??? + data_config: config.yaml + sample_break_mode: eos + tokens_per_sample: 512 + shorten_method: "random_crop" + text_maxtokens_ratio: 1.0 + + +dataset: + num_workers: 6 + max_tokens: 1400000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 1 + +criterion: + _name: hubert + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + label_smoothing: 0.1 + +optimization: + max_update: 800000 + lr: [0.0001] + clip_norm: 10.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: hubert + label_rate: ??? + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: default + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + final_dim: 256 + encoder_layerdrop: 0.05 + decoder_layerdrop: 0.05 + dropout_input: 0.1 + dropout_features: 0.1 + dropout: 0.1 + attention_dropout: 0.1 + feature_grad_mult: 0.1 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + adaptive_input: ${model.adaptive_input} + max_source_positions: ${task.text_cfg.tokens_per_sample} + checkpoint_activations: ${model.checkpoint_activations} + no_scale_embedding: false + layernorm_embedding: false + quant_noise: + pq: ${model.quant_noise_pq} + encoder: + embed_dim: 768 + ffn_embed_dim: 3072 + layers: 6 + attention_heads: 12 + normalize_before: false + learned_pos: false + layerdrop: ${model.encoder_layerdrop} + + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/config/pretrain/joint_large.yaml b/SpeechT5/YiTrans/yitrans_iwslt22/config/pretrain/joint_large.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dbacae9a15ba5c2ff34081d200f280b2a408f752 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/config/pretrain/joint_large.yaml @@ -0,0 +1,159 @@ +# @package _group_ + +common: + fp16: true + log_format: json + log_interval: 200 + seed: 1337 + tensorboard_logdir: tblog + +checkpoint: + save_dir: ??? + save_interval: 1 + keep_last_epochs: 10 + save_interval_updates: 10000 + keep_interval_updates: -1 + # no_epoch_checkpoints: true + +distributed_training: + ddp_backend: no_c10d + distributed_backend: 'nccl' + distributed_world_size: 32 + distributed_port: 29671 + nprocs_per_node: 8 + find_unused_parameters: true + +task: + _name: iwslt_joint_pretraining + data: ??? + label_dir: ??? + labels: ??? + label_rate: ${model.label_rate} + sample_rate: 16000 + max_sample_size: 250000 + min_sample_size: 32000 + pad_audio: false + random_crop: true + normalize: true # must be consistent with extractor + add_decoder: true + split_modality_batch: true + store_labels: true + text_cfg: + seed: ${common.seed} + text_data: ??? + data_config: config.yaml + sample_break_mode: eos + tokens_per_sample: 1024 + shorten_method: "random_crop" + text_maxtokens_ratio: 1.0 + mask_whole_words: true + +dataset: + num_workers: 4 + max_tokens: 900000 + skip_invalid_size_inputs_valid_test: true + validate_interval: ${checkpoint.save_interval} + validate_interval_updates: ${checkpoint.save_interval_updates} + required_batch_size_multiple: 1 + +criterion: + _name: joint_step1_split_batch + pred_masked_weight: 1.0 + pred_nomask_weight: 0.0 + loss_weights: [10,] + label_smoothing: 0.02 + +optimization: + max_update: 400000 + lr: [0.00003] + clip_norm: 1.0 + +optimizer: + _name: adam + adam_betas: (0.9,0.98) + adam_eps: 1e-06 + weight_decay: 0.01 + +lr_scheduler: + _name: polynomial_decay + warmup_updates: 32000 + +model: + _name: joint_ed + label_rate: ??? + encoder_layers: 24 + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + encoder_attention_heads: 16 + final_dim: 768 + skip_masked: false + skip_nomask: false + mask_prob: 0.80 + extractor_mode: layer_norm + conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' + encoder_layerdrop: 0.0 + dropout_input: 0.0 + dropout_features: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + layer_norm_first: true + feature_grad_mult: 1.0 + untie_final_proj: true + activation_dropout: 0.0 + use_rel_pos_enc: true + decoder_layers: 12 + decoder_embed_dim: 1024 + decoder_ffn_embed_dim: 4096 + decoder_attention_heads: 16 + decoder_output_dim: 1024 + decoder_normalize_before: true + layernorm_embedding: true + decoder_learned_pos: true + share_decoder_input_output_embed: true + share_enc_dec_embeddings: true + max_target_positions: 1024 + activation_fn: "gelu" + adaptive_input: false + checkpoint_activations: false + quant_noise_pq: 0 + add_text_modality: true + add_text_encoder: true + add_adaptor: true + + text_transformer: + activation_fn: ${model.activation_fn} + dropout: ${model.dropout} + attention_dropout: ${model.attention_dropout} + activation_dropout: ${model.activation_dropout} + adaptive_input: ${model.adaptive_input} + max_source_positions: ${task.text_cfg.tokens_per_sample} + checkpoint_activations: ${model.checkpoint_activations} + no_scale_embedding: false + layernorm_embedding: true + quant_noise: + pq: ${model.quant_noise_pq} + encoder: + embed_dim: 1024 + ffn_embed_dim: 4096 + layers: 12 + attention_heads: 16 + normalize_before: true + learned_pos: true + layerdrop: ${model.encoder_layerdrop} + + +hydra: + job: + config: + override_dirname: + kv_sep: '-' + item_sep: '__' + exclude_keys: + - run + - task.data + - task.label_dir + run: + dir: ??? + sweep: + dir: ??? + subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/criterions/__init__.py b/SpeechT5/YiTrans/yitrans_iwslt22/criterions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bb260356b113bc05c3556213a15a337a4513c42f --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/criterions/__init__.py @@ -0,0 +1,9 @@ +import importlib +import os + +for file in os.listdir(os.path.dirname(__file__)): + if file.endswith(".py") and not file.startswith("_"): + criterion_name = file[: file.find(".py")] + importlib.import_module( + "yitrans_iwslt22.criterions." + criterion_name + ) diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/criterions/ctc_ce.py b/SpeechT5/YiTrans/yitrans_iwslt22/criterions/ctc_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..40fab26b8db594f980541fa7e2d197b9329f1a40 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/criterions/ctc_ce.py @@ -0,0 +1,414 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import math +from argparse import Namespace +from dataclasses import dataclass, field +from omegaconf import II +from typing import Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.dataclass import FairseqDataclass +from fairseq.data.data_utils import post_process +from fairseq.tasks import FairseqTask +from fairseq.logging.meters import safe_round + + +@dataclass +class CtcCeCriterionConfig(FairseqDataclass): + zero_infinity: bool = field( + default=False, + metadata={"help": "zero inf loss when source length <= target length"}, + ) + sentence_avg: bool = II("optimization.sentence_avg") + post_process: str = field( + default="letter", + metadata={ + "help": "how to post process predictions into words. can be letter, " + "wordpiece, BPE symbols, etc. " + "See fairseq.data.data_utils.post_process() for full list of options" + }, + ) + wer_kenlm_model: Optional[str] = field( + default=None, + metadata={ + "help": "if this is provided, use kenlm to compute wer (along with other wer_* args)" + }, + ) + wer_lexicon: Optional[str] = field( + default=None, + metadata={"help": "lexicon to use with wer_kenlm_model"}, + ) + wer_lm_weight: float = field( + default=2.0, + metadata={"help": "lm weight to use with wer_kenlm_model"}, + ) + wer_word_score: float = field( + default=-1.0, + metadata={"help": "lm word score to use with wer_kenlm_model"}, + ) + + wer_args: Optional[str] = field( + default=None, + metadata={ + "help": "DEPRECATED: tuple of (wer_kenlm_model, wer_lexicon, wer_lm_weight, wer_word_score)" + }, + ) + + dec_weight: float = field( + default=0.5, + metadata={"help": "weights for decoder CE Loss, loss will be ((1 - dec_weight) * hubert_loss + dec_weight * CE_Loss)"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + label_smoothing: float = field( + default=0.1, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + + +@register_criterion("ctc_ce", dataclass=CtcCeCriterionConfig) +class CtcCeCriterion(FairseqCriterion): + def __init__(self, cfg: CtcCeCriterionConfig, task: FairseqTask): + super().__init__(task) + self.blank_idx = ( + task.target_dictionary.index(task.blank_symbol) + if hasattr(task, "blank_symbol") + else 0 + ) + self.pad_idx = task.target_dictionary.pad() + self.eos_idx = task.target_dictionary.eos() + self.post_process = cfg.post_process + + if cfg.wer_args is not None: + ( + cfg.wer_kenlm_model, + cfg.wer_lexicon, + cfg.wer_lm_weight, + cfg.wer_word_score, + ) = eval(cfg.wer_args) + + if cfg.wer_kenlm_model is not None: + from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder + + dec_args = Namespace() + dec_args.nbest = 1 + dec_args.criterion = "ctc" + dec_args.kenlm_model = cfg.wer_kenlm_model + dec_args.lexicon = cfg.wer_lexicon + dec_args.beam = 50 + dec_args.beam_size_token = min(50, len(task.target_dictionary)) + dec_args.beam_threshold = min(50, len(task.target_dictionary)) + dec_args.lm_weight = cfg.wer_lm_weight + dec_args.word_score = cfg.wer_word_score + dec_args.unk_weight = -math.inf + dec_args.sil_weight = 0 + + self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary) + else: + self.w2l_decoder = None + + self.zero_infinity = cfg.zero_infinity + self.sentence_avg = cfg.sentence_avg + + self.dec_weight = cfg.dec_weight + self.report_accuracy = cfg.report_accuracy + self.ignore_prefix_size = cfg.ignore_prefix_size + self.eps = cfg.label_smoothing + + def forward(self, model, sample, reduce=True): + net_output = model(**sample["net_input"]) + lprobs = model.get_normalized_probs( + net_output, log_probs=True + ).contiguous() # (T, B, C) from the encoder + + if "src_lengths" in sample["net_input"]: + input_lengths = sample["net_input"]["src_lengths"] + else: + if net_output["padding_mask"] is not None: + non_padding_mask = ~net_output["padding_mask"] + input_lengths = non_padding_mask.long().sum(-1) + else: + input_lengths = lprobs.new_full( + (lprobs.size(1),), lprobs.size(0), dtype=torch.long + ) + + pad_mask = (sample["target"] != self.pad_idx) & ( + sample["target"] != self.eos_idx + ) + targets_flat = sample["target"].masked_select(pad_mask) + if "target_lengths" in sample: + target_lengths = sample["target_lengths"] + else: + target_lengths = pad_mask.sum(-1) + + with torch.backends.cudnn.flags(enabled=False): + loss = F.ctc_loss( + lprobs, + targets_flat, + input_lengths, + target_lengths, + blank=self.blank_idx, + reduction="sum", + zero_infinity=self.zero_infinity, + ) + + ntokens = ( + sample["ntokens"] if "ntokens" in sample else target_lengths.sum().item() + ) + + sample_size = sample["target"].size(0) if self.sentence_avg else ntokens + + logging_output = {} + if "decoder_target" in sample: + if net_output["decoder_out"] is not None: + dec_sample_size = sample["target"].size(0) if self.sentence_avg else sample["dec_ntokens"] + dec_loss, dec_nll_loss = self.compute_ce_loss(model, net_output["decoder_out"], sample, reduce=reduce) + logging_output["ctc_loss"] = loss.item() + loss = (1 - self.dec_weight) * loss + (self.dec_weight * dec_loss * sample_size / dec_sample_size) + logging_output["dec_loss"] = dec_loss.item() + logging_output["dec_nll_loss"] = dec_nll_loss.item() + logging_output["dec_sample_size"] = dec_sample_size + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output["decoder_out"], sample) + logging_output["dec_n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + else: + logging_output["ctc_loss"] = loss.item() + loss = (1 - self.dec_weight) * loss + logging_output["dec_loss"] = 0 + logging_output["dec_nll_loss"] = 0 + logging_output["dec_sample_size"] = 1 + if self.report_accuracy: + logging_output["dec_n_correct"] = 0 + logging_output["total"] = 1 + + logging_output = { + "loss": utils.item(loss.data), # * sample['ntokens'], + "ntokens": ntokens, + "nsentences": sample["id"].numel(), + "sample_size": sample_size, + **logging_output, + } + + if not model.training and self.dec_weight < 1.0: + import editdistance + + with torch.no_grad(): + lprobs_t = lprobs.transpose(0, 1).float().contiguous().cpu() + + c_err = 0 + c_len = 0 + w_errs = 0 + w_len = 0 + wv_errs = 0 + for lp, t, inp_l in zip( + lprobs_t, + sample["target_label"] + if "target_label" in sample + else sample["target"], + input_lengths, + ): + lp = lp[:inp_l].unsqueeze(0) + + decoded = None + if self.w2l_decoder is not None: + decoded = self.w2l_decoder.decode(lp) + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + + p = (t != self.task.target_dictionary.pad()) & ( + t != self.task.target_dictionary.eos() + ) + targ = t[p] + targ_units = self.task.target_dictionary.string(targ) + targ_units_arr = targ.tolist() + + toks = lp.argmax(dim=-1).unique_consecutive() + pred_units_arr = toks[toks != self.blank_idx].tolist() + + c_err += editdistance.eval(pred_units_arr, targ_units_arr) + c_len += len(targ_units_arr) + + targ_words = post_process(targ_units, self.post_process).split() + + pred_units = self.task.target_dictionary.string(pred_units_arr) + pred_words_raw = post_process(pred_units, self.post_process).split() + + if decoded is not None and "words" in decoded: + pred_words = decoded["words"] + w_errs += editdistance.eval(pred_words, targ_words) + wv_errs += editdistance.eval(pred_words_raw, targ_words) + else: + dist = editdistance.eval(pred_words_raw, targ_words) + w_errs += dist + wv_errs += dist + + w_len += len(targ_words) + + logging_output["wv_errors"] = wv_errs + logging_output["w_errors"] = w_errs + logging_output["w_total"] = w_len + logging_output["c_errors"] = c_err + logging_output["c_total"] = c_len + + return loss, sample_size, logging_output + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.pad_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.pad_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + target = sample["decoder_target"] + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + nsentences = utils.item( + sum(log.get("nsentences", 0) for log in logging_outputs) + ) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar("ntokens", ntokens) + metrics.log_scalar("nsentences", nsentences) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + + c_errors = sum(log.get("c_errors", 0) for log in logging_outputs) + metrics.log_scalar("_c_errors", c_errors) + c_total = sum(log.get("c_total", 0) for log in logging_outputs) + metrics.log_scalar("_c_total", c_total) + w_errors = sum(log.get("w_errors", 0) for log in logging_outputs) + metrics.log_scalar("_w_errors", w_errors) + wv_errors = sum(log.get("wv_errors", 0) for log in logging_outputs) + metrics.log_scalar("_wv_errors", wv_errors) + w_total = sum(log.get("w_total", 0) for log in logging_outputs) + metrics.log_scalar("_w_total", w_total) + + if c_total > 0: + metrics.log_derived( + "uer", + lambda meters: safe_round( + meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3 + ) + if meters["_c_total"].sum > 0 + else float("nan"), + ) + if w_total > 0: + metrics.log_derived( + "wer", + lambda meters: safe_round( + meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + metrics.log_derived( + "raw_wer", + lambda meters: safe_round( + meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + + if "dec_loss" in logging_outputs[0]: + ctc_loss_sum = sum(log.get("ctc_loss", 0) for log in logging_outputs) + dec_loss_sum = sum(log.get("dec_loss", 0) for log in logging_outputs) + dec_nll_loss_sum = sum(log.get("dec_nll_loss", 0) for log in logging_outputs) + dec_sample_size = sum(log.get("dec_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "dec_loss", dec_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_scalar( + "ctc_loss", ctc_loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "dec_nll_loss", dec_nll_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_derived( + "dec_ppl", lambda meters: utils.get_perplexity(meters["dec_nll_loss"].avg) + ) + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("dec_n_correct", n_correct) + metrics.log_derived( + "dec_accuracy", + lambda meters: round( + meters["dec_n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return True diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/criterions/joint_step1_criterion.py b/SpeechT5/YiTrans/yitrans_iwslt22/criterions/joint_step1_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..2398de156affc681116e12e128123986ac21835f --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/criterions/joint_step1_criterion.py @@ -0,0 +1,366 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +import math +import re +from dataclasses import dataclass, field +from typing import List, Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.dataclass import FairseqDataclass + +logger = logging.getLogger(__name__) +@dataclass +class JointCriterionConfig(FairseqDataclass): + pred_masked_weight: float = field( + default=1.0, + metadata={"help": "weight for predictive loss for masked frames"}, + ) + pred_nomask_weight: float = field( + default=0.0, + metadata={"help": "weight for predictive loss for unmasked frames"}, + ) + loss_weights: Optional[List[float]] = field( + default=None, + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + log_keys: List[str] = field( + default_factory=lambda: [], + metadata={"help": "output keys to log"}, + ) + dec_weight: float = field( + default=1.0, + metadata={"help": "weights for decoder CE Loss, loss will be (hubert_loss + dec_weight * CE_Loss)"}, + ) + text_weight: float = field( + default=1.0, + metadata={"help": "weights for text ED CE Loss, loss will be (hubert_loss + dec_weight * CE_Loss + text_weight * CE_Loss)"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + + +@register_criterion("joint_step1", dataclass=JointCriterionConfig) +class JointCriterion(FairseqCriterion): + def __init__( + self, + task, + pred_masked_weight, + pred_nomask_weight, + loss_weights=None, + log_keys=None, + dec_weight=1.0, + text_weight=1.0, + report_accuracy=False, + ignore_prefix_size=0, + label_smoothing=0.0 + ): + super().__init__(task) + self.pred_masked_weight = pred_masked_weight + self.pred_nomask_weight = pred_nomask_weight + self.loss_weights = loss_weights + self.log_keys = [] if log_keys is None else log_keys + self.dec_weight = dec_weight + self.text_weight = text_weight + self.report_accuracy = report_accuracy + self.ignore_prefix_size = ignore_prefix_size + self.eps = label_smoothing + self.padding_idx = task.dictionaries[0].pad() + + def forward(self, model, sample, reduce=True, log_pred=False): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + + if "speech" in sample.keys(): + text_type = [name for name in sample.keys() if name.startswith("text")] + assert len(text_type) == 1 + text_type = text_type[0] + text_sample = sample[text_type] + sample = sample["speech"] + else: + text_sample = None + + sample["modality"] = "speech" + ### 1. do hubert forward and loss computation + net_output = model(target_list=sample["target_list"], **sample["net_input"]) + loss = 0.0 + sample_size = 0 + logging_output = {} + reduction = "sum" if reduce else "none" + + loss_m_list = [] + logp_m_list = model.get_logits(net_output, True) + targ_m_list = model.get_targets(net_output, True) + assert self.pred_masked_weight == 0 or len(logp_m_list) > 0 + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"loss_m_{i}"] = loss_m.detach().item() + if self.pred_masked_weight > 0: + loss += self.pred_masked_weight * sum(loss_m_list) + sample_size += targ_m_list[0].numel() + + loss_u_list = [] + logp_u_list = model.get_logits(net_output, False) + targ_u_list = model.get_targets(net_output, False) + assert self.pred_nomask_weight == 0 or len(logp_u_list) > 0 + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"loss_u_{i}"] = loss_u.detach().item() + if self.pred_nomask_weight > 0: + loss += self.pred_nomask_weight * sum(loss_u_list) + sample_size += targ_u_list[0].numel() + + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len( + self.loss_weights + ), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss += p + logging_output[f"loss_{n}"] = p.item() + + if "decoder_target" in sample: + dec_sample_size = sample["dec_ntokens"] + dec_loss, dec_nll_loss = self.compute_ce_loss(model, net_output["decoder_out"], sample, reduce=reduce) + loss = loss + (self.dec_weight * dec_loss * sample_size / dec_sample_size) + logging_output["dec_loss"] = dec_loss.item() + logging_output["dec_nll_loss"] = dec_nll_loss.item() + logging_output["dec_sample_size"] = dec_sample_size + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output["decoder_out"], sample) + logging_output["dec_n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + + if text_sample is not None: + ### 2. do text forward and loss computation + text_sample["modality"] = "text" + text_net_output = model(**text_sample["net_input"]) + text_dec_loss, text_dec_nll_loss = self.compute_ce_loss(model, text_net_output["decoder_out"], text_sample, reduce=reduce) + text_sample_size = text_sample["ntokens"] + loss = loss + (self.text_weight * text_dec_loss * sample_size / text_sample_size) + logging_output["text_dec_loss"] = text_dec_loss.item() + logging_output["text_dec_nll_loss"] = text_dec_nll_loss.item() + logging_output["text_sample_size"] = text_sample_size + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, text_net_output["decoder_out"], text_sample) + logging_output["text_dec_n_correct"] = utils.item(n_correct.data) + logging_output["text_total"] = utils.item(total.data) + + logging_output = { + "loss": loss.item() if reduce else loss, + "ntokens": sample_size, + "nsentences": sample["id"].numel() + (text_sample["id"].numel() if text_sample is not None else 0), + "sample_size": sample_size, + **logging_output, + } + + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + def compute_correct(logits): + if logits.numel() == 0: + return 0, 0 + else: + assert logits.dim() > 1, logits.shape + max = logits.argmax(-1) == 0 + min = logits.argmin(-1) == 0 + both = max & min + corr = max.long().sum().item() - both.long().sum().item() + count = max.numel() + return corr, count + + with torch.no_grad(): + for i, logp_m in enumerate(logp_m_list): + corr_m, count_m = compute_correct(logp_m) + logging_output[f"correct_m_{i}"] = corr_m + logging_output[f"count_m_{i}"] = count_m + + for i, logp_u in enumerate(logp_u_list): + corr_u, count_u = compute_correct(logp_u) + logging_output[f"correct_u_{i}"] = corr_u + logging_output[f"count_u_{i}"] = count_u + + return loss, sample_size, logging_output + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.padding_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.padding_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + if sample["modality"] == "speech": + target = sample["decoder_target"] + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + else: + target = sample["target"] + + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training (copied from normal cross entropy).""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg) + ) + else: + metrics.log_derived( + "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg) + ) + + counts = {} + for lk in logging_outputs[0].keys(): + if lk.startswith("count_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val) + counts[lk] = val + + for lk in logging_outputs[0].keys(): + if lk.startswith("loss_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / sample_size / math.log(2), round=3) + elif lk.startswith("correct_"): + val = sum(log[lk] for log in logging_outputs) + metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)]) + + if "dec_loss" in logging_outputs[0]: + dec_loss_sum = sum(log.get("dec_loss", 0) for log in logging_outputs) + dec_nll_loss_sum = sum(log.get("dec_nll_loss", 0) for log in logging_outputs) + dec_sample_size = sum(log.get("dec_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "dec_loss", dec_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_scalar( + "dec_nll_loss", dec_nll_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_derived( + "dec_ppl", lambda meters: utils.get_perplexity(meters["dec_nll_loss"].avg) + ) + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("dec_n_correct", n_correct) + metrics.log_derived( + "dec_accuracy", + lambda meters: round( + meters["dec_n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + ) + + if "text_dec_loss" in logging_outputs[0]: + text_dec_loss_sum = sum(log.get("text_dec_loss", 0) for log in logging_outputs) + text_dec_nll_loss_sum = sum(log.get("text_dec_nll_loss", 0) for log in logging_outputs) + text_sample_size = sum(log.get("text_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "text_dec_loss", text_dec_loss_sum / text_sample_size / math.log(2), text_sample_size, round=3 + ) + metrics.log_scalar( + "text_dec_nll_loss", text_dec_nll_loss_sum / text_sample_size / math.log(2), text_sample_size, round=3 + ) + metrics.log_derived( + "text_dec_ppl", lambda meters: utils.get_perplexity(meters["text_dec_nll_loss"].avg) + ) + text_total = utils.item(sum(log.get("text_total", 0) for log in logging_outputs)) + if text_total > 0: + metrics.log_scalar("text_total", text_total) + text_n_correct = utils.item( + sum(log.get("text_dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("text_dec_n_correct", text_n_correct) + metrics.log_derived( + "text_dec_accuracy", + lambda meters: round( + meters["text_dec_n_correct"].sum * 100.0 / meters["text_total"].sum, 3 + ) + if meters["text_total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + raise NotImplementedError() + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/criterions/joint_step1_split_batch_criterion.py b/SpeechT5/YiTrans/yitrans_iwslt22/criterions/joint_step1_split_batch_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..8d603e030dcbc7d50144d502ae7bf266365ea154 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/criterions/joint_step1_split_batch_criterion.py @@ -0,0 +1,370 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +import math +import re +from dataclasses import dataclass, field +from typing import List, Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.dataclass import FairseqDataclass + +logger = logging.getLogger(__name__) +@dataclass +class JointCriterionConfig(FairseqDataclass): + pred_masked_weight: float = field( + default=1.0, + metadata={"help": "weight for predictive loss for masked frames"}, + ) + pred_nomask_weight: float = field( + default=0.0, + metadata={"help": "weight for predictive loss for unmasked frames"}, + ) + loss_weights: Optional[List[float]] = field( + default=None, + metadata={"help": "weights for additional loss terms (not first one)"}, + ) + log_keys: List[str] = field( + default_factory=lambda: [], + metadata={"help": "output keys to log"}, + ) + dec_weight: float = field( + default=1.0, + metadata={"help": "weights for decoder CE Loss, loss will be (hubert_loss + dec_weight * CE_Loss)"}, + ) + text_weight: float = field( + default=1.0, + metadata={"help": "weights for text ED CE Loss, loss will be (hubert_loss + dec_weight * CE_Loss + text_weight * CE_Loss)"}, + ) + report_accuracy: bool = field( + default=True, + metadata={"help": "report decoder accuracy metric"}, + ) + ignore_prefix_size: int = field( + default=0, + metadata={"help": "Ignore first N tokens"}, + ) + label_smoothing: float = field( + default=0.0, + metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"}, + ) + + +@register_criterion("joint_step1_split_batch", dataclass=JointCriterionConfig) +class JointSplitCriterion(FairseqCriterion): + def __init__( + self, + task, + pred_masked_weight, + pred_nomask_weight, + loss_weights=None, + log_keys=None, + dec_weight=1.0, + text_weight=1.0, + report_accuracy=False, + ignore_prefix_size=0, + label_smoothing=0.0 + ): + super().__init__(task) + self.pred_masked_weight = pred_masked_weight + self.pred_nomask_weight = pred_nomask_weight + self.loss_weights = loss_weights + self.log_keys = [] if log_keys is None else log_keys + self.dec_weight = dec_weight + self.text_weight = text_weight + self.report_accuracy = report_accuracy + self.ignore_prefix_size = ignore_prefix_size + self.eps = label_smoothing + self.padding_idx = task.dictionaries[0].pad() + self.text_dict = task.text_dictionary + + def forward(self, model, sample, reduce=True, log_pred=False): + """Compute the loss for the given sample. + Returns a tuple with three elements: + 1) the loss + 2) the sample size, which is used as the denominator for the gradient + 3) logging outputs to display while training + """ + text_type = [name for name in sample.keys() if name.startswith("text")] + loss = 0. + sample_size = 0 + logging_output = {} + reduction = "sum" if reduce else "none" + + if "speech" in sample.keys(): + assert len(text_type) == 0 + sample = sample["speech"] + sample["modality"] = "speech" + + ### 1. do hubert forward and loss computation + net_output = model(target_list=sample["target_list"], **sample["net_input"]) + loss_m_list = [] + logp_m_list = model.get_logits(net_output, True) + targ_m_list = model.get_targets(net_output, True) + assert self.pred_masked_weight == 0 or len(logp_m_list) > 0 + for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)): + loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction) + loss_m_list.append(loss_m) + logging_output[f"loss_m_{i}"] = loss_m.detach().item() / targ_m_list[0].numel() + if self.pred_masked_weight > 0: + loss += self.pred_masked_weight * sum(loss_m_list) + sample_size += targ_m_list[0].numel() + + loss_u_list = [] + logp_u_list = model.get_logits(net_output, False) + targ_u_list = model.get_targets(net_output, False) + assert self.pred_nomask_weight == 0 or len(logp_u_list) > 0 + for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)): + loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction) + loss_u_list.append(loss_u) + logging_output[f"loss_u_{i}"] = loss_u.detach().item() / targ_m_list[0].numel() + if self.pred_nomask_weight > 0: + loss += self.pred_nomask_weight * sum(loss_u_list) + sample_size += targ_u_list[0].numel() + + if self.loss_weights is not None: + assert hasattr(model, "get_extra_losses") + extra_losses, names = model.get_extra_losses(net_output) + if torch.is_tensor(extra_losses): + extra_losses = [extra_losses] + names = [names] + if len(self.loss_weights) == 1 and len(extra_losses) != 1: + self.loss_weights = [self.loss_weights[0]] * len(extra_losses) + assert len(extra_losses) == len(self.loss_weights), f"{len(extra_losses)}, {len(self.loss_weights)}" + for p, n, coef in zip(extra_losses, names, self.loss_weights): + if coef != 0 and p is not None: + p = coef * p.float() * sample_size + loss += p + logging_output[f"loss_{n}"] = p.item() / sample_size + + if "decoder_target" in sample: + dec_sample_size = sample["dec_ntokens"] + dec_loss, dec_nll_loss = self.compute_ce_loss(model, net_output["decoder_out"], sample, reduce=reduce) + loss = loss + (self.dec_weight * dec_loss * sample_size / dec_sample_size) + logging_output["dec_loss"] = dec_loss.item() + logging_output["dec_nll_loss"] = dec_nll_loss.item() + logging_output["dec_sample_size"] = dec_sample_size + logging_output["hubert_sample_size"] = sample_size + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output["decoder_out"], sample) + logging_output["dec_n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + + loss = loss / sample_size + + for lk in self.log_keys: + if lk in net_output: + logging_output[lk] = float((net_output[lk])) + + def compute_correct(logits): + if logits.numel() == 0: + return 0, 0 + else: + assert logits.dim() > 1, logits.shape + max = logits.argmax(-1) == 0 + min = logits.argmin(-1) == 0 + both = max & min + corr = max.long().sum().item() - both.long().sum().item() + count = max.numel() + return corr, count + + with torch.no_grad(): + for i, logp_m in enumerate(logp_m_list): + corr_m, count_m = compute_correct(logp_m) + logging_output[f"correct_m_{i}"] = corr_m + logging_output[f"count_m_{i}"] = count_m + + for i, logp_u in enumerate(logp_u_list): + corr_u, count_u = compute_correct(logp_u) + logging_output[f"correct_u_{i}"] = corr_u + logging_output[f"count_u_{i}"] = count_u + logging_output["speech_sample_size"] = sample_size + + else: + assert len(text_type) == 1 + text_type = text_type[0] + text_sample = sample[text_type] + text_sample["modality"] = "text" + ### 2. do text forward and loss computation + text_net_output = model(**text_sample["net_input"]) + text_dec_loss, text_dec_nll_loss = self.compute_ce_loss(model, text_net_output["decoder_out"], text_sample, reduce=reduce) + text_sample_size = text_sample["ntokens"] + loss = loss + (self.text_weight * text_dec_loss) + logging_output["text_dec_loss"] = text_dec_loss.item() + logging_output["text_dec_nll_loss"] = text_dec_nll_loss.item() + logging_output["text_sample_size"] = text_sample_size + + loss = loss / text_sample_size + sample_size = text_sample_size + sample = text_sample + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, text_net_output["decoder_out"], text_sample) + logging_output["text_dec_n_correct"] = utils.item(n_correct.data) + logging_output["text_total"] = utils.item(total.data) + + logging_output = { + "loss": loss.item() if reduce else loss, + "ntokens": sample_size, + "nsentences": sample["id"].numel(), + "sample_size": 1, + **logging_output, + } + + return loss, 1, logging_output + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.padding_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.padding_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + if sample["modality"] == "speech": + target = sample["decoder_target"] + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + else: + target = sample["target"] + + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training (copied from normal cross entropy).""" + loss_sum = sum(log.get("loss", 0) for log in logging_outputs) + ntokens = sum(log.get("ntokens", 0) for log in logging_outputs) + sample_size = sum(log.get("sample_size", 0) for log in logging_outputs) + speech_sample_size = sum(log.get("speech_sample_size", 0) for log in logging_outputs) + + metrics.log_scalar("loss", loss_sum / sample_size / math.log(2), sample_size, round=3) + if sample_size != ntokens: + metrics.log_scalar("nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3) + metrics.log_derived("ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)) + else: + metrics.log_derived("ppl", lambda meters: utils.get_perplexity(meters["loss"].avg)) + + counts = {} + log_keys = [] + for log in logging_outputs: + log_keys += list(log.keys()) + log_keys = set(log_keys) + + for lk in log_keys: + if lk.startswith("count_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val) + counts[lk] = val + + for lk in log_keys: + if lk.startswith("loss_") and speech_sample_size > 0: + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / speech_sample_size / math.log(2), round=3) + elif lk.startswith("correct_"): + val = sum(log.get(lk, 0) for log in logging_outputs) + metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)]) + + if "dec_loss" in logging_outputs[0]: + dec_loss_sum = sum(log.get("dec_loss", 0) for log in logging_outputs) + dec_nll_loss_sum = sum(log.get("dec_nll_loss", 0) for log in logging_outputs) + dec_sample_size = sum(log.get("dec_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "dec_loss", dec_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_scalar( + "dec_nll_loss", dec_nll_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_derived( + "dec_ppl", lambda meters: utils.get_perplexity(meters["dec_nll_loss"].avg) + ) + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("dec_n_correct", n_correct) + metrics.log_derived( + "dec_accuracy", + lambda meters: round( + meters["dec_n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + ) + + # if "text_dec_loss" in logging_outputs[0]: + if any("text_dec_loss" in logging_output for logging_output in logging_outputs): + text_dec_loss_sum = sum(log.get("text_dec_loss", 0) for log in logging_outputs) + text_dec_nll_loss_sum = sum(log.get("text_dec_nll_loss", 0) for log in logging_outputs) + text_sample_size = sum(log.get("text_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "text_dec_loss", text_dec_loss_sum / text_sample_size / math.log(2), text_sample_size, round=3 + ) + metrics.log_scalar( + "text_dec_nll_loss", text_dec_nll_loss_sum / text_sample_size / math.log(2), text_sample_size, round=3 + ) + metrics.log_derived( + "text_dec_ppl", lambda meters: utils.get_perplexity(meters["text_dec_nll_loss"].avg) + ) + text_total = utils.item(sum(log.get("text_total", 0) for log in logging_outputs)) + if text_total > 0: + metrics.log_scalar("text_total", text_total) + text_n_correct = utils.item( + sum(log.get("text_dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("text_dec_n_correct", text_n_correct) + metrics.log_derived( + "text_dec_accuracy", + lambda meters: round( + meters["text_dec_n_correct"].sum * 100.0 / meters["text_total"].sum, 3 + ) + if meters["text_total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def aggregate_logging_outputs(logging_outputs): + """Aggregate logging outputs from data parallel training.""" + raise NotImplementedError() + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/criterions/joint_step2_criterion.py b/SpeechT5/YiTrans/yitrans_iwslt22/criterions/joint_step2_criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..aaafdbe1fd0cfc9826ed849bd94e2189a517014a --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/criterions/joint_step2_criterion.py @@ -0,0 +1,424 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import math +from argparse import Namespace +from dataclasses import dataclass, field +from omegaconf import II +from typing import Optional + +import torch +import torch.nn.functional as F +from fairseq import metrics, utils +from fairseq.criterions import FairseqCriterion, register_criterion +from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss +from fairseq.data.data_utils import post_process +from fairseq.tasks import FairseqTask +from fairseq.logging.meters import safe_round + +from yitrans_iwslt22.criterions.ctc_ce import CtcCeCriterionConfig + +@dataclass +class JointStep2CriterionConfig(CtcCeCriterionConfig): + pass + + +@register_criterion("joint_step2", dataclass=JointStep2CriterionConfig) +class JointStep2Criterion(FairseqCriterion): + def __init__(self, cfg: JointStep2CriterionConfig, task: FairseqTask): + super().__init__(task) + self.blank_idx = ( + task.target_dictionary.index(task.blank_symbol) + if hasattr(task, "blank_symbol") + else 0 + ) + self.pad_idx = task.target_dictionary.pad() + self.eos_idx = task.target_dictionary.eos() + self.post_process = cfg.post_process + + if cfg.wer_args is not None: + ( + cfg.wer_kenlm_model, + cfg.wer_lexicon, + cfg.wer_lm_weight, + cfg.wer_word_score, + ) = eval(cfg.wer_args) + + if cfg.wer_kenlm_model is not None: + from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder + + dec_args = Namespace() + dec_args.nbest = 1 + dec_args.criterion = "ctc" + dec_args.kenlm_model = cfg.wer_kenlm_model + dec_args.lexicon = cfg.wer_lexicon + dec_args.beam = 50 + dec_args.beam_size_token = min(50, len(task.target_dictionary)) + dec_args.beam_threshold = min(50, len(task.target_dictionary)) + dec_args.lm_weight = cfg.wer_lm_weight + dec_args.word_score = cfg.wer_word_score + dec_args.unk_weight = -math.inf + dec_args.sil_weight = 0 + + self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary) + else: + self.w2l_decoder = None + + self.zero_infinity = cfg.zero_infinity + self.sentence_avg = cfg.sentence_avg + + self.dec_weight = cfg.dec_weight + self.report_accuracy = cfg.report_accuracy + self.ignore_prefix_size = cfg.ignore_prefix_size + self.eps = cfg.label_smoothing + + def forward(self, model, sample, reduce=True): + text_type = [name for name in sample.keys() if name.startswith("text")] + logging_output = {} + if "speech" in sample.keys(): + assert len(text_type) == 0 + sample = sample["speech"] + sample["modality"] = "speech" + + net_output = model(**sample["net_input"]) + lprobs = model.get_normalized_probs( + net_output, log_probs=True + ).contiguous() # (T, B, C) from the encoder + + if "src_lengths" in sample["net_input"]: + input_lengths = sample["net_input"]["src_lengths"] + else: + if net_output["padding_mask"] is not None: + non_padding_mask = ~net_output["padding_mask"] + input_lengths = non_padding_mask.long().sum(-1) + else: + input_lengths = lprobs.new_full( + (lprobs.size(1),), lprobs.size(0), dtype=torch.long + ) + + pad_mask = (sample["target"] != self.pad_idx) & ( + sample["target"] != self.eos_idx + ) + targets_flat = sample["target"].masked_select(pad_mask) + if "target_lengths" in sample: + target_lengths = sample["target_lengths"] + else: + target_lengths = pad_mask.sum(-1) + + with torch.backends.cudnn.flags(enabled=False): + loss = F.ctc_loss( + lprobs, + targets_flat, + input_lengths, + target_lengths, + blank=self.blank_idx, + reduction="sum", + zero_infinity=self.zero_infinity, + ) + + ntokens = ( + sample["ntokens"] if "ntokens" in sample else target_lengths.sum().item() + ) + + sample_size = sample["target"].size(0) if self.sentence_avg else ntokens + + if "decoder_target" in sample: + if net_output["decoder_out"] is not None: + dec_sample_size = sample["target"].size(0) if self.sentence_avg else sample["dec_ntokens"] + dec_loss, dec_nll_loss = self.compute_ce_loss(model, net_output["decoder_out"], sample, reduce=reduce) + logging_output["ctc_loss"] = loss.item() + loss = (1 - self.dec_weight) * loss + (self.dec_weight * dec_loss * sample_size / dec_sample_size) + logging_output["dec_loss"] = dec_loss.item() + logging_output["dec_nll_loss"] = dec_nll_loss.item() + logging_output["dec_sample_size"] = dec_sample_size + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, net_output["decoder_out"], sample) + logging_output["dec_n_correct"] = utils.item(n_correct.data) + logging_output["total"] = utils.item(total.data) + else: + logging_output["ctc_loss"] = loss.item() + loss = (1 - self.dec_weight) * loss + logging_output["dec_loss"] = 0 + logging_output["dec_nll_loss"] = 0 + logging_output["dec_sample_size"] = 1 + if self.report_accuracy: + logging_output["dec_n_correct"] = 0 + logging_output["total"] = 1 + loss = loss / sample_size + logging_output["speech_sample_size"] = sample_size + else: + assert len(text_type) == 1 + text_type = text_type[0] + text_sample = sample[text_type] + text_sample["modality"] = "text" + ### 2. do text forward and loss computation + text_net_output = model(**text_sample["net_input"]) + text_dec_loss, text_dec_nll_loss = self.compute_ce_loss(model, text_net_output["decoder_out"], text_sample, reduce=reduce) + text_sample_size = text_sample["target"].size(0) if self.sentence_avg else text_sample["ntokens"] + loss = text_dec_loss + logging_output["text_dec_loss"] = text_dec_loss.item() + logging_output["text_dec_nll_loss"] = text_dec_nll_loss.item() + logging_output["text_sample_size"] = text_sample_size + + loss = loss / text_sample_size + sample = text_sample + ntokens = text_sample["ntokens"] + + if self.report_accuracy: + n_correct, total = self.compute_accuracy(model, text_net_output["decoder_out"], text_sample) + logging_output["text_dec_n_correct"] = utils.item(n_correct.data) + logging_output["text_total"] = utils.item(total.data) + + logging_output = { + "loss": utils.item(loss.data), # * sample['ntokens'], + "ntokens": ntokens, + "nsentences": sample["id"].numel(), + "sample_size": 1, + **logging_output, + } + + if not model.training and self.dec_weight < 1.0 and "speech" in sample.keys(): + import editdistance + + with torch.no_grad(): + lprobs_t = lprobs.transpose(0, 1).float().contiguous().cpu() + + c_err = 0 + c_len = 0 + w_errs = 0 + w_len = 0 + wv_errs = 0 + for lp, t, inp_l in zip( + lprobs_t, + sample["target_label"] + if "target_label" in sample + else sample["target"], + input_lengths, + ): + lp = lp[:inp_l].unsqueeze(0) + + decoded = None + if self.w2l_decoder is not None: + decoded = self.w2l_decoder.decode(lp) + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + if len(decoded) < 1: + decoded = None + else: + decoded = decoded[0] + + p = (t != self.task.target_dictionary.pad()) & ( + t != self.task.target_dictionary.eos() + ) + targ = t[p] + targ_units = self.task.target_dictionary.string(targ) + targ_units_arr = targ.tolist() + + toks = lp.argmax(dim=-1).unique_consecutive() + pred_units_arr = toks[toks != self.blank_idx].tolist() + + c_err += editdistance.eval(pred_units_arr, targ_units_arr) + c_len += len(targ_units_arr) + + targ_words = post_process(targ_units, self.post_process).split() + + pred_units = self.task.target_dictionary.string(pred_units_arr) + pred_words_raw = post_process(pred_units, self.post_process).split() + + if decoded is not None and "words" in decoded: + pred_words = decoded["words"] + w_errs += editdistance.eval(pred_words, targ_words) + wv_errs += editdistance.eval(pred_words_raw, targ_words) + else: + dist = editdistance.eval(pred_words_raw, targ_words) + w_errs += dist + wv_errs += dist + + w_len += len(targ_words) + + logging_output["wv_errors"] = wv_errs + logging_output["w_errors"] = w_errs + logging_output["w_total"] = w_len + logging_output["c_errors"] = c_err + logging_output["c_total"] = c_len + + return loss, 1, logging_output + + def compute_ce_loss(self, model, net_output, sample, reduce=True): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + loss, nll_loss = label_smoothed_nll_loss( + lprobs, + target, + self.eps, + ignore_index=self.pad_idx, + reduce=reduce, + ) + return loss, nll_loss + + def compute_accuracy(self, model, net_output, sample): + lprobs, target = self.get_lprobs_and_target(model, net_output, sample) + mask = target.ne(self.pad_idx) + n_correct = torch.sum( + lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)) + ) + total = torch.sum(mask) + return n_correct, total + + def get_lprobs_and_target(self, model, net_output, sample): + lprobs = model.get_normalized_probs(net_output, log_probs=True) + if sample["modality"] == "speech": + target = sample["decoder_target"] + if self.ignore_prefix_size > 0: + if getattr(lprobs, "batch_first", False): + lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous() + target = target[:, self.ignore_prefix_size :].contiguous() + else: + lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous() + target = target[self.ignore_prefix_size :, :].contiguous() + else: + target = sample["target"] + + return lprobs.view(-1, lprobs.size(-1)), target.view(-1) + + + @staticmethod + def reduce_metrics(logging_outputs) -> None: + """Aggregate logging outputs from data parallel training.""" + + loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs)) + ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs)) + nsentences = utils.item( + sum(log.get("nsentences", 0) for log in logging_outputs) + ) + sample_size = utils.item( + sum(log.get("sample_size", 0) for log in logging_outputs) + ) + + metrics.log_scalar( + "loss", loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar("ntokens", ntokens) + metrics.log_scalar("nsentences", nsentences) + if sample_size != ntokens: + metrics.log_scalar( + "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3 + ) + + c_errors = sum(log.get("c_errors", 0) for log in logging_outputs) + metrics.log_scalar("_c_errors", c_errors) + c_total = sum(log.get("c_total", 0) for log in logging_outputs) + metrics.log_scalar("_c_total", c_total) + w_errors = sum(log.get("w_errors", 0) for log in logging_outputs) + metrics.log_scalar("_w_errors", w_errors) + wv_errors = sum(log.get("wv_errors", 0) for log in logging_outputs) + metrics.log_scalar("_wv_errors", wv_errors) + w_total = sum(log.get("w_total", 0) for log in logging_outputs) + metrics.log_scalar("_w_total", w_total) + + if c_total > 0: + metrics.log_derived( + "uer", + lambda meters: safe_round( + meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3 + ) + if meters["_c_total"].sum > 0 + else float("nan"), + ) + if w_total > 0: + metrics.log_derived( + "wer", + lambda meters: safe_round( + meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + metrics.log_derived( + "raw_wer", + lambda meters: safe_round( + meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3 + ) + if meters["_w_total"].sum > 0 + else float("nan"), + ) + + if "dec_loss" in logging_outputs[0]: + ctc_loss_sum = sum(log.get("ctc_loss", 0) for log in logging_outputs) + dec_loss_sum = sum(log.get("dec_loss", 0) for log in logging_outputs) + dec_nll_loss_sum = sum(log.get("dec_nll_loss", 0) for log in logging_outputs) + dec_sample_size = sum(log.get("dec_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "dec_loss", dec_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_scalar( + "ctc_loss", ctc_loss_sum / sample_size / math.log(2), sample_size, round=3 + ) + metrics.log_scalar( + "dec_nll_loss", dec_nll_loss_sum / dec_sample_size / math.log(2), dec_sample_size, round=3 + ) + metrics.log_derived( + "dec_ppl", lambda meters: utils.get_perplexity(meters["dec_nll_loss"].avg) + ) + total = utils.item(sum(log.get("total", 0) for log in logging_outputs)) + if total > 0: + metrics.log_scalar("total", total) + n_correct = utils.item( + sum(log.get("dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("dec_n_correct", n_correct) + metrics.log_derived( + "dec_accuracy", + lambda meters: round( + meters["dec_n_correct"].sum * 100.0 / meters["total"].sum, 3 + ) + if meters["total"].sum > 0 + else float("nan"), + ) + + # if "text_dec_loss" in logging_outputs[0]: + if any("text_dec_loss" in logging_output for logging_output in logging_outputs): + text_dec_loss_sum = sum(log.get("text_dec_loss", 0) for log in logging_outputs) + text_dec_nll_loss_sum = sum(log.get("text_dec_nll_loss", 0) for log in logging_outputs) + text_sample_size = sum(log.get("text_sample_size", 0) for log in logging_outputs) + metrics.log_scalar( + "text_dec_loss", text_dec_loss_sum / text_sample_size / math.log(2), text_sample_size, round=3 + ) + metrics.log_scalar( + "text_dec_nll_loss", text_dec_nll_loss_sum / text_sample_size / math.log(2), text_sample_size, round=3 + ) + metrics.log_derived( + "text_dec_ppl", lambda meters: utils.get_perplexity(meters["text_dec_nll_loss"].avg) + ) + text_total = utils.item(sum(log.get("text_total", 0) for log in logging_outputs)) + if text_total > 0: + metrics.log_scalar("text_total", text_total) + text_n_correct = utils.item( + sum(log.get("text_dec_n_correct", 0) for log in logging_outputs) + ) + metrics.log_scalar("text_dec_n_correct", text_n_correct) + metrics.log_derived( + "text_dec_accuracy", + lambda meters: round( + meters["text_dec_n_correct"].sum * 100.0 / meters["text_total"].sum, 3 + ) + if meters["text_total"].sum > 0 + else float("nan"), + ) + + @staticmethod + def logging_outputs_can_be_summed() -> bool: + """ + Whether the logging outputs returned by `forward` can be summed + across workers prior to calling `reduce_metrics`. Setting this + to True will improves distributed training speed. + """ + return False diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/data/concat_dataset.py b/SpeechT5/YiTrans/yitrans_iwslt22/data/concat_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..9cdb5231d7cc6e701b99f5490d3406fad139c20f --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/data/concat_dataset.py @@ -0,0 +1,124 @@ +# modalified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/data/concat_dataset.py + +import bisect + +import numpy as np +from torch.utils.data.dataloader import default_collate + +from fairseq.data import FairseqDataset + + +class ConcatDataset(FairseqDataset): + @staticmethod + def cumsum(sequence, sample_ratios): + r, s = [], 0 + for e, ratio in zip(sequence, sample_ratios): + curr_len = int(ratio * len(e)) + r.append(curr_len + s) + s += curr_len + return r + + def __init__(self, datasets, sample_ratios=1): + super(ConcatDataset, self).__init__() + assert len(datasets) > 0, "datasets should not be an empty iterable" + self.datasets = list(datasets) + if isinstance(sample_ratios, int): + sample_ratios = [sample_ratios] * len(self.datasets) + self.sample_ratios = sample_ratios + self.cumulative_sizes = self.cumsum(self.datasets, sample_ratios) + self.real_sizes = [len(d) for d in self.datasets] + + def __len__(self): + return self.cumulative_sizes[-1] + + def __getitem__(self, idx): + dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx) + return self.datasets[dataset_idx][sample_idx] + + def _get_dataset_and_sample_index(self, idx: int): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + sample_idx = sample_idx % self.real_sizes[dataset_idx] + return dataset_idx, sample_idx + + def collater(self, samples, **extra_args): + # For now only supports datasets with same underlying collater implementations + if hasattr(self.datasets[0], "collater"): + return self.datasets[0].collater(samples, **extra_args) + else: + return default_collate(samples, **extra_args) + + def size(self, idx: int): + """ + Return an example's size as a float or tuple. + """ + dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx) + return self.datasets[dataset_idx].size(sample_idx) + + def num_tokens(self, index: int): + return np.max(self.size(index)) + + def attr(self, attr: str, index: int): + dataset_idx = bisect.bisect_right(self.cumulative_sizes, index) + return getattr(self.datasets[dataset_idx], attr, None) + + @property + def sizes(self): + _dataset_sizes = [] + for ds, sr in zip(self.datasets, self.sample_ratios): + if isinstance(ds.sizes, np.ndarray): + _dataset_sizes.append(np.tile(ds.sizes, sr)) + else: + # Only support underlying dataset with single size array. + assert isinstance(ds.sizes, list) + _dataset_sizes.append(np.tile(ds.sizes[0], sr)) + return np.concatenate(_dataset_sizes) + + @property + def supports_prefetch(self): + return all(d.supports_prefetch for d in self.datasets) + + def ordered_indices(self): + """ + Returns indices sorted by length. So less padding is needed. + """ + if isinstance(self.sizes, np.ndarray) and len(self.sizes.shape) > 1: + # special handling for concatenating lang_pair_datasets + if getattr(self.datasets[0], "shuffle", False): + indices = np.random.permutation(len(self)).astype(np.int64) + else: + indices = np.arange(len(self), dtype=np.int64) + sizes = self.sizes + tgt_sizes = ( + sizes[:, 1] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else None + ) + src_sizes = ( + sizes[:, 0] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else sizes + ) + # sort by target length, then source length + if tgt_sizes is not None: + indices = indices[np.argsort(tgt_sizes[indices], kind="mergesort")] + return indices[np.argsort(src_sizes[indices], kind="mergesort")] + else: + return np.argsort(self.sizes) + + def prefetch(self, indices): + frm = 0 + for to, ds in zip(self.cumulative_sizes, self.datasets): + real_size = len(ds) + if getattr(ds, "supports_prefetch", False): + ds.prefetch([(i - frm) % real_size for i in indices if frm <= i < to]) + frm = to + + @property + def can_reuse_epoch_itr_across_epochs(self): + return all(d.can_reuse_epoch_itr_across_epochs for d in self.datasets) + + def set_epoch(self, epoch): + super().set_epoch(epoch) + for ds in self.datasets: + if hasattr(ds, "set_epoch"): + ds.set_epoch(epoch) diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/data/denoising_dataset.py b/SpeechT5/YiTrans/yitrans_iwslt22/data/denoising_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..f49870600047683fa7b9e37bc50a86bf0c87be53 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/data/denoising_dataset.py @@ -0,0 +1,90 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import math + +import numpy as np +import torch + +from fairseq.data import FairseqDataset, data_utils, DenoisingDataset + + +class DenoisingDatasetLang(DenoisingDataset): + """ + A wrapper around DenoisingDataset for BART dataset. + + """ + + def __init__( + self, + dataset, + sizes, + vocab, + mask_idx, + mask_whole_words, + shuffle, + seed, + args, + eos=None, + item_transform_func=None, + tgt_lang_idx=None, + ): + super().__init__( + dataset, + sizes, + vocab, + mask_idx, + mask_whole_words, + shuffle, + seed, + args, + eos, + item_transform_func, + ) + + self.tgt_lang_idx=tgt_lang_idx + + def __getitem__(self, index): + with data_utils.numpy_seed(self.seed, self.epoch, index): + tokens = self.dataset[index] + assert tokens[-1] == self.eos + source, target = tokens, tokens.clone() + + if self.permute_sentence_ratio > 0.0: + source = self.permute_sentences(source, self.permute_sentence_ratio) + + if self.mask_ratio > 0: + source = self.add_whole_word_mask(source, self.mask_ratio) + + if self.insert_ratio > 0: + source = self.add_insertion_noise(source, self.insert_ratio) + + if self.rotate_ratio > 0.0 and np.random.random() < self.rotate_ratio: + source = self.add_rolling_noise(source) + # there can additional changes to make: + if self.item_transform_func is not None: + source, target = self.item_transform_func(source, target) + + assert (source >= 0).all() + assert (source[1:-1] >= 1).all() + assert (source <= len(self.vocab)).all() + assert source[0] == self.vocab.bos() + assert target[0] == self.vocab.bos() + assert source[-1] == self.eos + + if self.tgt_lang_idx is not None: + tgt_lang_idx = torch.LongTensor([self.tgt_lang_idx]) + source = torch.cat([source[1:], tgt_lang_idx]) + target = torch.cat([target[1:], tgt_lang_idx]) + sample = { + "id": index, + "source": source, + "target": target, + } + return sample diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/data/lang_pair_mask_dataset.py b/SpeechT5/YiTrans/yitrans_iwslt22/data/lang_pair_mask_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..e5617a23150a2268cd1ba36b9b7fed4c5e7b3d09 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/data/lang_pair_mask_dataset.py @@ -0,0 +1,62 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/data/audio/multi_modality_dataset.py +""" + + +from typing import Optional + +import numpy as np +import torch +from fairseq.data import ( + LanguagePairDataset, +) +from fairseq.data.audio.multi_modality_dataset import LangPairMaskDataset as FairseqLangPairMaskDataset + +class LangPairMaskDataset(FairseqLangPairMaskDataset): + def __init__( + self, + dataset: LanguagePairDataset, + src_eos: int, + src_bos: Optional[int] = None, + noise_id: Optional[int] = -1, + mask_ratio: Optional[float] = 0, + mask_type: Optional[str] = "random", + ): + super.__init__( + dataset, + src_eos, + src_bos, + noise_id, + mask_ratio, + mask_type, + ) + def mask_src_tokens(self, sample): + src_item = sample["source"] + mask = None + if self.mask_type == "random": + mask = torch.rand(len(src_item)).le(self.mask_ratio) + else: + mask = torch.ones(len(src_item)) + mask[: int(len(src_item) * (1 - self.mask_ratio))] = 0 + mask = mask.eq(1) + if src_item[0] == self.src_bos: + mask[0] = False + if src_item[-1] == self.src_eos: + mask[-1] = False + mask_src_item = src_item.masked_fill(mask, self.noise_id) + smp = sample + smp["source"] = mask_src_item + return smp + + def collater(self, samples, pad_to_length=None): + return self.dataset.collater(samples, pad_to_length=pad_to_length) + diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/data/load_langpair_dataset.py b/SpeechT5/YiTrans/yitrans_iwslt22/data/load_langpair_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..62c5e7b7789deece849df07b560d775c5c84d5c0 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/data/load_langpair_dataset.py @@ -0,0 +1,170 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/tasks/translation.py + Add custom lang_format in function load_langpair_dataset +""" + +import itertools +import logging +import os + +from fairseq.data import ( + AppendTokenDataset, + LanguagePairDataset, + PrependTokenDataset, + StripTokenDataset, + TruncateDataset, + data_utils, + indexed_dataset, +) + +from yitrans_iwslt22.data.concat_dataset import ConcatDataset + + +EVAL_BLEU_ORDER = 4 + + +logger = logging.getLogger(__name__) + + +def load_langpair_dataset( + data_path, + split, + src, + src_dict, + tgt, + tgt_dict, + combine, + dataset_impl, + upsample_primary, + left_pad_source, + left_pad_target, + max_source_positions, + max_target_positions, + prepend_bos=False, + load_alignments=False, + truncate_source=False, + append_source_id=False, + num_buckets=0, + shuffle=True, + pad_to_multiple=1, + prepend_bos_src=None, + lang_format="[{}]", +): + def split_exists(split, src, tgt, lang, data_path): + filename = os.path.join(data_path, "{}.{}-{}.{}".format(split, src, tgt, lang)) + return indexed_dataset.dataset_exists(filename, impl=dataset_impl) + + src_datasets = [] + tgt_datasets = [] + + for k in itertools.count(): + split_k = split + (str(k) if k > 0 else "") + + # infer langcode + if split_exists(split_k, src, tgt, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, src, tgt)) + elif split_exists(split_k, tgt, src, src, data_path): + prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, tgt, src)) + else: + if k > 0: + break + else: + raise FileNotFoundError( + "Dataset not found: {} ({})".format(split, data_path) + ) + + src_dataset = data_utils.load_indexed_dataset( + prefix + src, src_dict, dataset_impl + ) + if truncate_source: + src_dataset = AppendTokenDataset( + TruncateDataset( + StripTokenDataset(src_dataset, src_dict.eos()), + max_source_positions - 1, + ), + src_dict.eos(), + ) + src_datasets.append(src_dataset) + + tgt_dataset = data_utils.load_indexed_dataset( + prefix + tgt, tgt_dict, dataset_impl + ) + if tgt_dataset is not None: + tgt_datasets.append(tgt_dataset) + + logger.info( + "{} {} {}-{} {} examples".format( + data_path, split_k, src, tgt, len(src_datasets[-1]) + ) + ) + + if not combine: + break + + assert len(src_datasets) == len(tgt_datasets) or len(tgt_datasets) == 0 + + if len(src_datasets) == 1: + src_dataset = src_datasets[0] + tgt_dataset = tgt_datasets[0] if len(tgt_datasets) > 0 else None + else: + sample_ratios = [1] * len(src_datasets) + sample_ratios[0] = upsample_primary + src_dataset = ConcatDataset(src_datasets, sample_ratios) + if len(tgt_datasets) > 0: + tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios) + else: + tgt_dataset = None + + if prepend_bos: + assert hasattr(src_dict, "bos_index") and hasattr(tgt_dict, "bos_index") + src_dataset = PrependTokenDataset(src_dataset, src_dict.bos()) + if tgt_dataset is not None: + tgt_dataset = PrependTokenDataset(tgt_dataset, tgt_dict.bos()) + elif prepend_bos_src is not None: + logger.info(f"prepending src bos: {prepend_bos_src}") + src_dataset = PrependTokenDataset(src_dataset, prepend_bos_src) + + eos = None + if append_source_id: + src_dataset = AppendTokenDataset( + src_dataset, src_dict.index(lang_format.format(src)) + ) + if tgt_dataset is not None: + tgt_dataset = AppendTokenDataset( + tgt_dataset, tgt_dict.index(lang_format.format(tgt)) + ) + eos = tgt_dict.index(lang_format.format(tgt)) + + align_dataset = None + if load_alignments: + align_path = os.path.join(data_path, "{}.align.{}-{}".format(split, src, tgt)) + if indexed_dataset.dataset_exists(align_path, impl=dataset_impl): + align_dataset = data_utils.load_indexed_dataset( + align_path, None, dataset_impl + ) + + tgt_dataset_sizes = tgt_dataset.sizes if tgt_dataset is not None else None + return LanguagePairDataset( + src_dataset, + src_dataset.sizes, + src_dict, + tgt_dataset, + tgt_dataset_sizes, + tgt_dict, + left_pad_source=left_pad_source, + left_pad_target=left_pad_target, + align_dataset=align_dataset, + eos=eos, + num_buckets=num_buckets, + shuffle=shuffle, + pad_to_multiple=pad_to_multiple, + ) diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/data/multimodal_corpus_dataset.py b/SpeechT5/YiTrans/yitrans_iwslt22/data/multimodal_corpus_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..ee02a4e9dee21acd200ef038f6be3a241d51479f --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/data/multimodal_corpus_dataset.py @@ -0,0 +1,346 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +from os import replace +import time +from collections import OrderedDict +from typing import Any, Dict, List, Optional + +import numpy as np +from fairseq.data import data_utils + +from fairseq.data import FairseqDataset + +logger = logging.getLogger(__name__) + + +class MultiCorpusDataset(FairseqDataset): + """ + see fairseq/fairseq/data/multi_corpus_dataset.__doc__ + + Args: + datasets: a OrderedDict of FairseqDataset instances. + distribution: a List containing the probability of getting an utterance from + corresponding dataset + seed: random seed for sampling the datsets + sort_indices: if true, will sort the ordered indices by size + batch_sample: if true, will ensure each batch is from a single dataset + """ + + def __init__( + self, + datasets: Dict[str, FairseqDataset], + max_positions: Dict, + distribution: List[float], + max_tokens_ratio: List[float], + seed: int, + sort_indices: bool = False, + check_length: bool = False, + ): + super().__init__() + assert isinstance(datasets, OrderedDict) + assert len(datasets) == len(distribution) + # assert sum(distribution) == 1 + self.datasets = datasets + self.distribution = distribution + self.max_tokens_ratio = max_tokens_ratio + self.seed = seed + self.sort_indices = sort_indices + self.max_positions = max_positions + self.check_length = check_length + + # Avoid repeated conversions to list later + self.dataset_list = list(datasets.values()) + self.total_num_instances = 0 + + # first_dataset = self.dataset_list[0] + + self.num_instances_per_dataset = [] + self.dataset_offsets = [] + for i, dataset in enumerate(self.dataset_list): + assert isinstance(dataset, FairseqDataset) + # assert type(dataset) is type(first_dataset) + self.num_instances_per_dataset.append( + 0 if self.distribution[i] == 0 else len(dataset) + ) + self.dataset_offsets.append(self.total_num_instances) + self.total_num_instances += self.num_instances_per_dataset[i] + + def ordered_indices(self): + start = time.time() + with data_utils.numpy_seed(self.seed, self.epoch): + logger.info(f"sampling new dataset with seed {self.seed} epoch {self.epoch}") + sampled_indices = {} + + # For each dataset i, sample self.distribution[i] * self.total_num_instances + for i, key in enumerate(self.datasets): + tp = time.time() + if self.distribution[i] == 0: + # skip dataset if sampling probability is 0 + continue + + if i < len(self.datasets) - 1: + num_instances = int(self.distribution[i] * self.total_num_instances) + high = self.dataset_offsets[i + 1] + else: + num_instances = int(self.distribution[i] * self.total_num_instances) + high = self.total_num_instances + + logger.info(f"sampling {num_instances} from {key} dataset") + + # First, add k copies of the dataset where k = num_instances // len(dataset). + # This ensures an equal distribution of the data points as much as possible. + # For the remaining entries randomly sample them + dataset_size = len(self.datasets[key]) + num_copies = num_instances // dataset_size + dataset_indices = np.random.permutation(high - self.dataset_offsets[i])[: num_instances - num_copies * dataset_size] + if num_copies > 0: + dataset_indices = np.concatenate( + ( + np.repeat( + np.arange(high - self.dataset_offsets[i]), num_copies + ), + dataset_indices, + ) + ) + # filter by size, we should ignore it by setting check_length=False + # , as it is very time-consuming on large dadaset + if self.max_positions[key] is not None and self.check_length: + dataset_indices, ignored = self.datasets[key].filter_indices_by_size( + dataset_indices, + self.max_positions[key], + ) + if len(ignored) > 0: + logger.warning( + ( + "{:,} samples have invalid sizes and will be skipped, " + "max_positions={}, first few sample ids={}" + ).format(len(ignored), self.max_positions[key], ignored[:10]) + ) + + if self.sort_indices: + logger.info(" - sampled indices took {}s".format(time.time() - tp)) + tp = time.time() + dataset_indices = np.sort(dataset_indices) + dataset_indices = self.datasets[key].ordered_indices()[dataset_indices] + self.dataset_offsets[i] + logger.info(" - ordered_indices took {}s".format(time.time() - tp)) + else: + np.random.shuffle(dataset_indices) + + sampled_indices[key] = dataset_indices + + logger.info( + "multi_corpus_dataset ordered_indices took {}s".format( + time.time() - start + ) + ) + return sampled_indices + + def _map_index(self, index: int): + """ + If dataset A has length N and dataset B has length M + then index 1 maps to index 1 of dataset A, and index N + 1 + maps to index 1 of B. + """ + counter = 0 + for num_instances, key in zip(self.num_instances_per_dataset, self.datasets): + if index < counter + num_instances: + return index - counter, key + counter += num_instances + raise ValueError( + "Invalid index: {}, max: {}".format(index, self.total_num_instances) + ) + + def __len__(self): + """ + Length of this dataset is the sum of individual datasets + """ + return self.total_num_instances + + def __getitem__(self, index): + new_index, key = self._map_index(index) + try: + item = self.datasets[key][new_index] + item["full_id"] = index + return item + except Exception as e: + e.args = (f"Error from {key} dataset", *e.args) + raise + + def collater(self, samples): + """ + If we are doing batch sampling, then pick the right collater to use. + + Otherwise we assume all collaters are the same. + """ + if len(samples) == 0: + return None + + samples_dict = {key: [] for key in self.datasets} + for s in samples: + _, key = self._map_index(s["full_id"]) + samples_dict[key].append(s) + + batch = {} + for key in samples_dict: + if len(samples_dict[key]) == 0: + continue + batch[key] = self.datasets[key].collater(samples_dict[key]) + + return batch + + + def num_tokens(self, index: int): + index, key = self._map_index(index) + return self.datasets[key].num_tokens(index) + + def size(self, index: int): + index, key = self._map_index(index) + return self.datasets[key].size(index) + + @property + def can_reuse_epoch_itr_across_epochs(self): + return False + + def set_epoch(self, epoch, **unused): + super().set_epoch(epoch) + logger.info(f"setting epoch of multi_corpus_dataset to {epoch}") + for ds in self.dataset_list: + if hasattr(ds, "set_epoch"): + ds.set_epoch(epoch) + self.epoch = epoch + + @property + def supports_prefetch(self): + return False + + @property + def supports_fetch_outside_dataloader(self): + return all( + self.datasets[key].supports_fetch_outside_dataloader + for key in self.datasets + ) + + + def batch_by_size( + self, + indices, + max_tokens=None, + max_sentences=None, + required_batch_size_multiple=1, + ): + dataset_indices = indices + batches_dict = {} + for n, key in enumerate(dataset_indices): + max_tokens_ratio = self.max_tokens_ratio[n] + cur_batches = super().batch_by_size( + np.array(dataset_indices[key], dtype=np.int64), + round(max_tokens * max_tokens_ratio), + max_sentences, + required_batch_size_multiple, + ) + logger.info(f"Created {len(cur_batches)} batches for dataset {key}") + batches_dict[key] = cur_batches + + return batches_dict + + + def get_batch_sampler( + self, + indices, + num_shards, + seed, + max_tokens=None, + max_sentences=None, + required_batch_size_multiple=1, + split_modality_batch=False, + ): + + def batch_sampler(dataset, epoch): + start = time.time() + batches_dict = dataset.batch_by_size( + indices, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + ) + logger.info(f"multi_corpus_dataset, batch_by_size took {time.time() - start}s") + start = time.time() + new_batches = [] + + ### shuffle inner group size, split into speech/text batches + speech_batches, text_batches = [], [] + for name, batches in batches_dict.items(): + batches = inner_bucket_shuffle(batches, seed+epoch, num_shards*10) + batches = batches[: (len(batches) // num_shards) * num_shards] + if name.startswith("speech"): + speech_batches += batches + else: + text_batches += batches + if len(speech_batches) == 0: + logger.warning(f"Sample 0 speech batch, please ensure that no speech data loaded.") + if len(text_batches) == 0: + logger.warning(f"Sample 0 text batch, please ensure that no text data loaded.") + + ### shuffle groups + if len(speech_batches) == 0 or len(text_batches) == 0: + new_batches = speech_batches + text_batches + new_batches = shuffle_buckets(new_batches, seed=seed+epoch, inner_shuf=False) + else: + speech_batches = shuffle_buckets(speech_batches, seed=seed+epoch, inner_shuf=False) + text_batches = shuffle_buckets(text_batches, seed=seed+epoch, inner_shuf=False) + num_batch = min(len(speech_batches), len(text_batches)) + if split_modality_batch: + for i in range(0, num_batch, num_shards): + new_batches += speech_batches[i: i + num_shards] + new_batches += text_batches[i: i + num_shards] + else: + for i in range(num_batch): + new_batches.append(np.concatenate([speech_batches[i], text_batches[i]])) + + logger.info(f"multi_corpus_dataset sample {len(new_batches)} batches, took {time.time() - start}s") + return new_batches + + def inner_bucket_shuffle(batches, seed, bucket_size=10, thr=0): + """we assert batches is sorted form long to short. + shuffle samples in a buctet(e.g. 10 batches). + batches: a list of numpy array""" + num_batch = len(batches) + new_batches = [] + num_buckets = len(batches) // bucket_size + i = 0 + while i < num_batch: + if (i < bucket_size * thr or + i >= bucket_size * (num_buckets - thr) + ): + new_batches.append(batches[i]) + i += 1 + else: + group = np.concatenate(batches[i: i+bucket_size]) + with data_utils.numpy_seed(seed): + np.random.shuffle(group) + new_batches += np.array_split(group, bucket_size) + i += bucket_size + assert all([len(batch) > 0 for batch in new_batches]) + return new_batches + + def shuffle_buckets(batches, seed, inner_shuf=True): + if inner_shuf: + batches = inner_bucket_shuffle(batches, seed, num_shards*10) + batches = [batches[i: i + num_shards] for i in range(0, len(batches)-num_shards+1, num_shards)] + assert len(batches[-1]) == num_shards + new_batches = [] + with data_utils.numpy_seed(seed): + np.random.shuffle(batches) + for group in batches: + new_batches += group + return new_batches + + return batch_sampler diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/data/speech2c_dataset.py b/SpeechT5/YiTrans/yitrans_iwslt22/data/speech2c_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..e75d75a96a1759c79a0a4ba1cc297e2eea4a1aaa --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/data/speech2c_dataset.py @@ -0,0 +1,222 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import itertools +import logging +import os +import sys +from typing import Any, List, Optional, Union + +import numpy as np + +import torch +import torch.nn.functional as F +from fairseq.data import data_utils, Dictionary +from fairseq.data.audio.hubert_dataset import HubertDataset + +logger = logging.getLogger(__name__) + + + +class Speech2cDataset(HubertDataset): + def __init__( + self, + manifest_path: str, + sample_rate: float, + label_paths: List[str], + label_rates: Union[List[float], float], # -1 for sequence labels + pad_list: List[str], + eos_list: List[str], + label_processors: Optional[List[Any]] = None, + max_keep_sample_size: Optional[int] = None, + min_keep_sample_size: Optional[int] = None, + max_sample_size: Optional[int] = None, + shuffle: bool = True, + pad_audio: bool = False, + normalize: bool = False, + store_labels: bool = True, + random_crop: bool = False, + single_target: bool = False, + tgt_dict: Optional[Dictionary] = None, + add_decoder: bool = False, + fine_tuning: bool = False, + tokenizer = None, + tgt_lang_idx: int = None, + mbart_style_lang_id: bool = False, + retry_times: int = 5, + reduce_label_for_dec: bool = True, + ): + super().__init__( + manifest_path, + sample_rate, + label_paths, + label_rates, + pad_list, + eos_list, + label_processors, + max_keep_sample_size, + min_keep_sample_size, + max_sample_size, + shuffle, + pad_audio, + normalize, + store_labels, + random_crop, + single_target + ) + self.tgt_dict = tgt_dict + self.add_decoder = add_decoder + self.fine_tuning = fine_tuning + self.tokenizer = tokenizer + self.tgt_lang_idx = tgt_lang_idx + self.mbart_style_lang_id = mbart_style_lang_id + self.retry_times = retry_times + self.reduce_label_for_dec = reduce_label_for_dec + logger.info( + f"tgt_lang_idx={self.tgt_lang_idx}, reduce_label_for_dec={reduce_label_for_dec}, " + f"mbart_style_lang_id={mbart_style_lang_id}" + ) + + self.sizes = np.array(self.sizes) + + def get_label(self, index, label_idx): + if self.store_labels: + label = self.label_list[label_idx][index] + else: + with open(self.label_paths[label_idx]) as f: + offset_s, offset_e = self.label_offsets_list[label_idx][index] + f.seek(offset_s) + label = f.read(offset_e - offset_s) + + if self.tokenizer is not None and self.fine_tuning: + label = self.tokenizer.encode(label) + + if self.label_processors is not None: + label = self.label_processors[label_idx](label) + return label + + def collater(self, samples): + # target = max(sizes) -> random_crop not used + # target = max_sample_size -> random_crop used for long + samples = [s for s in samples if s["source"] is not None] + if len(samples) == 0: + return {} + + audios = [s["source"] for s in samples] + audio_sizes = [len(s) for s in audios] + if self.pad_audio: + audio_size = min(max(audio_sizes), self.max_sample_size) + else: + audio_size = min(min(audio_sizes), self.max_sample_size) + collated_audios, padding_mask, audio_starts = self.collater_audio( + audios, audio_size + ) + + targets_by_label = [ + [s["label_list"][i] for s in samples] for i in range(self.num_labels) + ] + targets_list, lengths_list, ntokens_list = self.collater_label( + targets_by_label, audio_size, audio_starts + ) + + if self.add_decoder: + if self.fine_tuning: + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]], torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + else: + if self.tokenizer is not None: + decoder_label = [ + # Set 48 for translate int to char and avoid \n + torch.cat( + ( + torch.tensor( + self.tokenizer.sp.Encode( + "".join( + [chr(j + 48) for j in ( + targets_list[0][i, :lengths_list[0][i]].unique_consecutive() if self.reduce_label_for_dec else targets_list[0][i, :lengths_list[0][i]] + ).tolist()] + ), out_type=int + ) + ), + torch.tensor([self.tgt_dict.eos()]) + ), dim=0 + ).long() + for i in range(targets_list[0].size(0)) + ] + else: + decoder_label = [ + torch.cat((targets_list[0][i, :lengths_list[0][i]].unique_consecutive() if self.reduce_label_for_dec else targets_list[0][i, :lengths_list[0][i]], torch.tensor([self.tgt_dict.eos()])), 0).long() + for i in range(targets_list[0].size(0)) + ] + + if self.mbart_style_lang_id: + decoder_label = [ + torch.cat((decoder_label[i], torch.tensor([self.tgt_lang_idx])), 0).long() + for i in range(targets_list[0].size(0)) + ] + + dec_ntokens = sum(x.size(0) for x in decoder_label) + decoder_target = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos() if not self.mbart_style_lang_id else self.tgt_lang_idx, + left_pad=False, + move_eos_to_beginning=False, + ) + decoder_target_lengths = torch.tensor( + [x.size(0) for x in decoder_label], dtype=torch.long + ) + prev_output_tokens = data_utils.collate_tokens( + decoder_label, + self.tgt_dict.pad(), + self.tgt_dict.eos() if not self.mbart_style_lang_id else self.tgt_lang_idx, + left_pad=False, + move_eos_to_beginning=True, + ) + + if self.tgt_lang_idx is not None and not self.mbart_style_lang_id: + assert (prev_output_tokens[:, 0] != self.tgt_dict.eos()).sum() == 0 + prev_output_tokens[:, 0] = self.tgt_lang_idx + + net_input = { + "source": collated_audios, + "padding_mask": padding_mask, + "prev_output_tokens": prev_output_tokens, + } + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + "decoder_target": decoder_target, + "decoder_target_lengths": decoder_target_lengths, + "dec_ntokens": dec_ntokens, + "lang_idx": self.tgt_lang_idx, + } + else: + net_input = {"source": collated_audios, "padding_mask": padding_mask} + batch = { + "id": torch.LongTensor([s["id"] for s in samples]), + "net_input": net_input, + } + + if self.single_target: + batch["target_lengths"] = lengths_list[0] + batch["ntokens"] = ntokens_list[0] + batch["target"] = targets_list[0] + else: + batch["target_lengths_list"] = lengths_list + batch["ntokens_list"] = ntokens_list + batch["target_list"] = targets_list + return batch + + # @property + # def sizes(self): + # return np.array(self.sizes) + diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/models/__init__.py b/SpeechT5/YiTrans/yitrans_iwslt22/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/models/_hubert_mt.py b/SpeechT5/YiTrans/yitrans_iwslt22/models/_hubert_mt.py new file mode 100644 index 0000000000000000000000000000000000000000..d997eb21bdef44ab51ef02ba586b9058c87f371e --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/models/_hubert_mt.py @@ -0,0 +1,310 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import contextlib +from argparse import Namespace +from typing import Any, Optional + +import torch +import torch.nn as nn +from dataclasses import dataclass, field +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import BaseFairseqModel, FairseqEncoder, register_model +from fairseq.models.hubert.hubert import MASKING_DISTRIBUTION_CHOICES +from fairseq.tasks import FairseqTask +from omegaconf import II, MISSING + +from .hubert_asr import HubertAsrConfig +from fairseq.models.transformer import TransformerConfig +logger = logging.getLogger(__name__) + + +@dataclass +class HubertMTConfig(HubertAsrConfig): + load_pretrained_mbart_from: Optional[str] = field( + default=None, + metadata={ + "help": "model to take text encoder decoder weights from (for initialization)" + }, + ) + use_rel_pos_enc: bool = field( + default=True, + metadata={"help": "whether to use relative positional encoding"}, + ) + text_transformer_encoder_layers: int = field( + default=12, + metadata={"help": "reset text_transformer_encoder_layers"}, + ) + + +@register_model("hubert_mt", dataclass=HubertMTConfig) +class HubertMT(BaseFairseqModel): + def __init__(self, cfg: HubertMTConfig, w2v_encoder: BaseFairseqModel): + super().__init__() + self.cfg = cfg + self.w2v_encoder = w2v_encoder + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: HubertMTConfig, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = HubertEncoder(cfg, task.target_dictionary) + return cls(cfg, w2v_encoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + if "decoder_out" in net_output: + return self.w2v_encoder.get_normalized_probs_decoder(net_output["decoder_out"], log_probs, sample) + + assert "encoder_out" not in net_output + if "encoder_out" not in net_output: + return self.w2v_encoder.get_normalized_probs_decoder(net_output, log_probs, sample) + + + def get_logits(self, net_output): + logits = net_output["encoder_out"] + padding = net_output["encoder_padding_mask"] + if padding is not None and padding.any(): + padding = padding.T + logits[padding][..., 0] = 0 + logits[padding][..., 1:] = float("-inf") + + return logits + + def forward(self, **kwargs): + x = self.w2v_encoder(**kwargs) + return x + + @property + def encoder(self): + return self.w2v_encoder + + def reorder_encoder_out(self, encoder_out, new_order): + return self.encoder.reorder_encoder_out(encoder_out, new_order) + + @property + def decoder(self): + return self.w2v_encoder.w2v_model.decoder + + +class HubertEncoder(FairseqEncoder): + def __init__(self, cfg: HubertMTConfig, tgt_dict=None): + self.apply_mask = cfg.apply_mask + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "decoder_layerdrop": cfg.decoder_layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + "decoder_dict_size": -1, + "add_text_modality": True, + "add_text_encoder": True, + "load_pretrained_mbart_from": None, + "load_pretrained_w2v_from": None, + "text_transformer": { + "encoder":{ + "layers": cfg.text_transformer_encoder_layers, + "layerdrop": cfg.layerdrop, + }, + 'dropout': cfg.dropout, + 'attention_dropout': cfg.attention_dropout, + 'activation_dropout': cfg.activation_dropout, + } + } + if cfg.no_pretrained_weights: + arg_overrides["use_rel_pos_enc"] = cfg.use_rel_pos_enc + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu( + cfg.w2v_path, arg_overrides + ) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + # logger.info("---------------------state.keys()-------------------------------------------") + # logger.info(state.keys()) + # logger.info("---------------------w2v_args.task-------------------------------------------") + # logger.info(w2v_args.task) + # logger.info("---------------------w2v_args.model-------------------------------------------") + # logger.info(w2v_args.model) + # logger.info("----------------------------------------------------------------") + + w2v_args.task.data = cfg.data + w2v_args.task.text_cfg.text_data = cfg.data + w2v_args.task.text_cfg.data_config = None + task = tasks.setup_task(w2v_args.task) + + if state is not None and "task_state" in state: + # This will load the stored "dictionaries" object + task.load_state_dict(state["task_state"]) + + model = task.build_model(w2v_args.model) + + + ### load mbart if specificed + if cfg.load_pretrained_mbart_from is not None and cfg.no_pretrained_weights: + logger.info("Loading mbart....") + mbart_model_state = model.load_checkpoint(cfg.load_pretrained_mbart_from) + model.text_encoder = model.load_pretrained_component_from_model( + component=model.text_encoder, state=mbart_model_state + ) + model.decoder = model.load_pretrained_component_from_model( + component=model.decoder, state=mbart_model_state + ) + + if state is not None and not cfg.no_pretrained_weights: + logger.info("Loading pre-trained models....") + model.load_state_dict(state["model"], strict=True) + + ### remove_pretraining_modules model.remove_pretraining_modules() + model.target_glu = None + model.final_proj = None + model.feature_extractor = None + model.post_extract_proj = None + model.encoder = None + + + + dropout_keys = [ n for n in w2v_args.model.text_transformer if n.find("drop") >= 0 ] + for key in dropout_keys: + logger.info(f"{key}: {w2v_args.model.text_transformer[key]}") + + super().__init__(task.source_dictionary) + + d = w2v_args.model.encoder_embed_dim + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.freeze_decoder_updates = cfg.freeze_decoder_updates + self.num_updates = 0 + + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, src_tokens, src_lengths, prev_output_tokens, tbc=True, **kwargs): + + # ft = self.freeze_finetune_updates <= self.num_updates + w2v_args = { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + "mask": self.apply_mask and self.training, + "prev_output_tokens": prev_output_tokens, + } + + results = self.w2v_model(**w2v_args) + return results + + def get_normalized_probs_decoder(self, net_output, log_probs, sample=None): + # net_output['encoder_out'] is a (B, T, D) tensor + return self.w2v_model.get_normalized_probs(net_output, log_probs, sample) + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + if isinstance(encoder_out["encoder_out"], list): + encoder_out["encoder_out"] = ( + [] if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + else: + encoder_out["encoder_out"] = encoder_out[ + "encoder_out" + ].index_select(1, new_order) + if encoder_out["encoder_padding_mask"] is not None: + if isinstance(encoder_out["encoder_padding_mask"], list): + encoder_out["encoder_padding_mask"] = ( + [] if len(encoder_out["encoder_padding_mask"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"]] + ) + else: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + if "decoder_out" in encoder_out and encoder_out["decoder_out"] is not None: + if isinstance(encoder_out["decoder_out"], list): + encoder_out["decoder_out"] = ( + [] if len(encoder_out["decoder_out"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["decoder_out"]] + ) + else: + encoder_out["decoder_out"] = encoder_out[ + "decoder_out" + ].index_select(0, new_order) + if "encoder_out_for_ctc" in encoder_out and encoder_out["encoder_out_for_ctc"] is not None: + if isinstance(encoder_out["encoder_out_for_ctc"], list): + encoder_out["encoder_out_for_ctc"] = ( + [] if len(encoder_out["encoder_out_for_ctc"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out_for_ctc"]] + ) + else: + encoder_out["encoder_out_for_ctc"] = encoder_out[ + "encoder_out_for_ctc" + ].index_select(1, new_order) + + return encoder_out + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + encoder_out = self.w2v_model.forward_torchscript(net_input) + if "encoder_out_for_ctc" in encoder_out: + del encoder_out['encoder_out_for_ctc'] + + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/models/finetune_asr.py b/SpeechT5/YiTrans/yitrans_iwslt22/models/finetune_asr.py new file mode 100644 index 0000000000000000000000000000000000000000..a89b9676788c0d48b1f400b1ce4c3d3636ce008c --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/models/finetune_asr.py @@ -0,0 +1,460 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +import contextlib +from argparse import Namespace +from typing import Any, Optional + +import torch +import torch.nn as nn +import pickle +from dataclasses import dataclass, field +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import BaseFairseqModel, FairseqEncoder, register_model +from fairseq.models.hubert.hubert_asr import HubertCtcConfig +from fairseq.tasks import FairseqTask +from omegaconf import II, MISSING + +from yitrans_iwslt22.modules import MultimodalTransformerDecoder + +logger = logging.getLogger(__name__) + +@dataclass +class HubertAsrConfig(HubertCtcConfig): + # for decoder + decoder_layerdrop: float = field( + default=0.1, + metadata={"help": "probability of dropping a decoder layer in hubert"}, + ) + add_decoder: bool = field( + default=False, + metadata={"help": "whether to add decoder for CE Loss on code"}, + ) + reuse_text_emb: bool = field( + default=False, + metadata={"help": "reuse text token embeddings instead of initialize randomly"}, + ) + freeze_decoder_updates: int = field( + default=0, + metadata={"help": "dont finetune hubert for this many updates"}, + ) + share_decoder_input_output_embed: bool = field( + default=False, + metadata={"help": "share decoder input and output embeddings"}, + ) + share_enc_dec_embeddings: bool = field( + default=False, + metadata={"help": "share embeddings of (text encoder, text decoder)"}, + ) + share_s2t_t2t_embeddings: bool = field( + default=False, + metadata={"help": "share embeddings of (speech2text(code), text2text)"}, + ) + share_ctc_decoder_embed: bool = field( + default=False, + metadata={"help": "share ctc and decoder embedding (only when share_decoder_input_output_embed is true)"}, + ) + enc_grad_mult: float = field( + default=1.0, + metadata={"help": "reset feature grad mult in hubert to this (only for st2t)"}, + ) + retain_dict_path: Optional[str] = field( + default=None, + metadata={"help": "delete embeddings according to this path"}, + ) + load_step2_model_from: Optional[str] = field( + default=None, + metadata={ + "help": "load step2 model from" + }, + ) + load_pretrained_mbart_from: Optional[str] = field( + default=None, + metadata={ + "help": "model to take text encoder decoder weights from (for initialization)" + }, + ) + load_pretrained_w2v_from: Optional[str] = field( + default=None, + metadata={ + "help": "model to take speech encoder weights from (for initialization)" + }, + ) + use_rel_pos_enc: bool = field( + default=True, + metadata={"help": "whether to use relative positional encoding"}, + ) + encoder_layers: int = field( + default=12, + metadata={"help": "encoder_layers"}, + ) + add_text_encoder: bool = field( + default=True, + metadata={"help": "add_text_encoder"}, + ) + add_adaptor: bool = field( + default=True, + metadata={"help": "add_adaptor"}, + ) + adaptor_stride: int = field( + default=2, + metadata={"help": "adaptor stride"}, + ) + + +@register_model("yitrans_asr", dataclass=HubertAsrConfig) +class YitransASR(BaseFairseqModel): + def __init__(self, cfg: HubertAsrConfig, w2v_encoder: BaseFairseqModel): + super().__init__() + self.cfg = cfg + self.w2v_encoder = w2v_encoder + + ### in case we need load hubert_step2 model + if cfg.load_step2_model_from: + logger.info(f"Loading hubert_step2 pretrained model for finetuning: {cfg.load_step2_model_from}") + hubert_step2_states = self.w2v_encoder.w2v_model.load_checkpoint(cfg.load_step2_model_from)["model"] + if cfg.retain_dict_path is not None: + assert self.w2v_encoder.w2v_model.add_text_modality, "Mustc have text modality if retain dict path" + logger.info("Cut embedding to a smaller size according to retain dict") + with open(cfg.retain_dict_path, "rb") as fp: + overlap_idxs = pickle.load(fp) + hubert_step2_states['w2v_encoder.w2v_model.decoder.output_projection.0.weight'] = hubert_step2_states['w2v_encoder.w2v_model.decoder.output_projection.0.weight'][overlap_idxs] + hubert_step2_states["w2v_encoder.w2v_model.decoder.embed_tokens_list.0.weight"] = hubert_step2_states["w2v_encoder.w2v_model.decoder.embed_tokens_list.0.weight"][overlap_idxs] + hubert_step2_states["w2v_encoder.proj.weight"] = hubert_step2_states["w2v_encoder.proj.weight"][overlap_idxs] + try: + self.load_state_dict(hubert_step2_states, strict=True) + except Exception as e: + logger.warn(e) + self.load_state_dict(hubert_step2_states, strict=False) + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: HubertAsrConfig, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = HubertEncoder(cfg, task.target_dictionary) + return cls(cfg, w2v_encoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + if "encoder_out" not in net_output: + return self.w2v_encoder.get_normalized_probs_decoder(net_output, log_probs, sample) + + if "encoder_out_for_ctc" in net_output: + logits = net_output["encoder_out_for_ctc"] + else: + logits = net_output["encoder_out"] + + if isinstance(logits, list): + logits = logits[0] + + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + def get_logits(self, net_output): + logits = net_output["encoder_out"] + padding = net_output["encoder_padding_mask"] + if padding is not None and padding.any(): + padding = padding.T + logits[padding][..., 0] = 0 + logits[padding][..., 1:] = float("-inf") + + return logits + + def forward(self, **kwargs): + x = self.w2v_encoder(**kwargs) + return x + + @property + def encoder(self): + return self.w2v_encoder + + def reorder_encoder_out(self, encoder_out, new_order): + return self.encoder.reorder_encoder_out(encoder_out, new_order) + + @property + def decoder(self): + return self.w2v_encoder.w2v_model.decoder + + +class HubertEncoder(FairseqEncoder): + def __init__(self, cfg: HubertAsrConfig, tgt_dict=None): + self.apply_mask = cfg.apply_mask + logger.info(f"self.apply_mask: {self.apply_mask}") + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "decoder_layerdrop": cfg.decoder_layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + "decoder_dict_size": len(tgt_dict) if cfg.add_decoder else -1, + "share_decoder_input_output_embed": cfg.share_decoder_input_output_embed, + "load_pretrained_w2v_from": cfg.load_pretrained_w2v_from, + "load_pretrained_mbart_from": cfg.load_pretrained_mbart_from, + "adaptor_stride": cfg.adaptor_stride, + } + + if cfg.no_pretrained_weights: + arg_overrides["use_rel_pos_enc"] = cfg.use_rel_pos_enc + arg_overrides["encoder_layers"] = cfg.encoder_layers + arg_overrides["add_text_encoder"] = cfg.add_text_encoder + arg_overrides["share_enc_dec_embeddings"] = cfg.share_enc_dec_embeddings + arg_overrides["share_s2t_t2t_embeddings"] = cfg.share_s2t_t2t_embeddings + arg_overrides["add_adaptor"] = cfg.add_adaptor + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + ## in speech_text_joint_to_text, data is loaded by soundfile, which returns without normalization + if cfg.normalize != w2v_args.task.normalize: + logger.warn( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + w2v_args.task.data = cfg.data + if hasattr(w2v_args.task, "text_cfg"): + w2v_args.task.text_cfg.data_config = None + w2v_args.task.add_decoder = cfg.add_decoder + task = tasks.setup_task(w2v_args.task) + if state is not None and "task_state" in state: + # This will load the stored "dictionaries" object + task.load_state_dict(state["task_state"]) + model = task.build_model(w2v_args.model) + + ### delete the embed_tokens and output_projection of decoder + if state is not None and not cfg.no_pretrained_weights: + if cfg.retain_dict_path is not None: + assert model.add_text_modality, "Mustc have text modality if retain dict path" + logger.info("Cut embedding to a smaller size according to ratin dict") + with open(cfg.retain_dict_path, "rb") as fp: + overlap_idxs = pickle.load(fp) + state['model']['decoder.output_projection.1.weight'] = state['model']['decoder.output_projection.1.weight'][overlap_idxs] + state["model"]["decoder.embed_tokens_list.1.weight"] = state["model"]["decoder.embed_tokens_list.1.weight"][overlap_idxs] + if cfg.reuse_text_emb: + assert model.add_text_modality, "Mustc have text modality if reuse text embed" + logger.info("Loading text-text pretrained token-embedding for speech-text finetuning...") + state["model"]["decoder.embed_tokens_list.0.weight"] = state["model"]["decoder.embed_tokens_list.1.weight"] + del state["model"]["decoder.embed_tokens_list.1.weight"] + state["model"]["decoder.output_projection.0.weight"] = state["model"]["decoder.output_projection.1.weight"] + del state["model"]["decoder.output_projection.1.weight"] + try: + model.load_state_dict(state["model"], strict=True) + except Exception as e: + logger.warn(e) + model.load_state_dict(state["model"], strict=False) + else: + for pname in list(state["model"].keys()): + if pname.startswith("decoder.embed_tokens") or pname.startswith("decoder.output_projection"): + del state["model"][pname] + # set strict=False because we omit some modules + model.load_state_dict(state["model"], strict=False) + + ### in case we need load mbart embedding into asr embedding + if cfg.no_pretrained_weights and cfg.load_pretrained_mbart_from and cfg.reuse_text_emb: + logger.info("Loading mbart pretrained token-embedding for speech-text finetuning...") + mbart_dec_states = model.decoder.state_dict() + loading_states = {} + if cfg.retain_dict_path is not None: + logger.info("Cut embedding to a smaller size according to ratin dict") + with open(cfg.retain_dict_path, "rb") as fp: + overlap_idxs = pickle.load(fp) + loading_states["output_projection.0.weight"] = mbart_dec_states['output_projection.1.weight'][overlap_idxs] + loading_states["embed_tokens_list.0.weight"] = mbart_dec_states['embed_tokens_list.1.weight'][overlap_idxs] + else: + loading_states["output_projection.0.weight"] = mbart_dec_states['output_projection.1.weight'] + loading_states["embed_tokens_list.0.weight"] = mbart_dec_states['embed_tokens_list.1.weight'] + model.decoder.load_state_dict(loading_states, strict=False) + + model.remove_pretraining_modules() + + super().__init__(task.source_dictionary) + + d = w2v_args.model.encoder_embed_dim + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.freeze_decoder_updates = cfg.freeze_decoder_updates + self.num_updates = 0 + + if cfg.share_ctc_decoder_embed: + assert cfg.add_decoder and cfg.share_decoder_input_output_embed, "Must share decoder input and output embed before share ctc and decoder embed" + if isinstance(self.w2v_model.decoder, MultimodalTransformerDecoder): + self.proj = nn.Linear( + self.w2v_model.decoder.embed_tokens_list[0].weight.shape[1], + self.w2v_model.decoder.embed_tokens_list[0].weight.shape[0], + bias=False, + ) + self.proj.weight = self.w2v_model.decoder.embed_tokens_list[0].weight + else: + self.proj = nn.Linear( + self.w2v_model.decoder.embed_tokens.weight.shape[1], + self.w2v_model.decoder.embed_tokens.weight.shape[0], + bias=False, + ) + self.proj.weight = self.w2v_model.decoder.embed_tokens.weight + elif tgt_dict is not None: + self.proj = Linear(d, len(tgt_dict)) + elif getattr(cfg, "decoder_embed_dim", d) != d: + self.proj = Linear(d, cfg.decoder_embed_dim) + else: + self.proj = None + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, source, padding_mask, prev_output_tokens=None, tbc=True, **kwargs): + + ft = self.freeze_finetune_updates <= self.num_updates + w2v_args = { + "source": source, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + "prev_output_tokens": prev_output_tokens, + "ft": ft, + } + + if self.freeze_decoder_updates <= self.num_updates: + self.w2v_model.add_decoder = True + else: + self.w2v_model.add_decoder = False + + x, padding_mask, decoder_out = self.w2v_model.extract_features(**w2v_args) + + if tbc: + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + x = self.final_dropout(x) + + if self.proj: + x = self.proj(x) + + return { + "encoder_out": x, # T x B x C + "encoder_padding_mask": padding_mask, # B x T + "padding_mask": padding_mask, + "decoder_out": decoder_out, + } + + def get_normalized_probs_decoder(self, net_output, log_probs, sample=None): + # net_output['encoder_out'] is a (B, T, D) tensor + return self.w2v_model.get_normalized_probs(net_output, log_probs, sample) + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + if isinstance(encoder_out["encoder_out"], list): + encoder_out["encoder_out"] = ( + [] if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + else: + encoder_out["encoder_out"] = encoder_out[ + "encoder_out" + ].index_select(1, new_order) + if encoder_out["encoder_padding_mask"] is not None: + if isinstance(encoder_out["encoder_padding_mask"], list): + encoder_out["encoder_padding_mask"] = ( + [] if len(encoder_out["encoder_padding_mask"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"]] + ) + else: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + if "decoder_out" in encoder_out and encoder_out["decoder_out"] is not None: + if isinstance(encoder_out["decoder_out"], list): + encoder_out["decoder_out"] = ( + [] if len(encoder_out["decoder_out"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["decoder_out"]] + ) + else: + encoder_out["decoder_out"] = encoder_out[ + "decoder_out" + ].index_select(0, new_order) + if "encoder_out_for_ctc" in encoder_out and encoder_out["encoder_out_for_ctc"] is not None: + if isinstance(encoder_out["encoder_out_for_ctc"], list): + encoder_out["encoder_out_for_ctc"] = ( + [] if len(encoder_out["encoder_out_for_ctc"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out_for_ctc"]] + ) + else: + encoder_out["encoder_out_for_ctc"] = encoder_out[ + "encoder_out_for_ctc" + ].index_select(1, new_order) + + return encoder_out + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + encoder_out = self.w2v_model.forward_torchscript(net_input) + + assert self.proj is not None + encoder_out['encoder_out_for_ctc'] = [self.proj(encoder_out['encoder_out'][0])] + + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/models/finetune_mt.py b/SpeechT5/YiTrans/yitrans_iwslt22/models/finetune_mt.py new file mode 100644 index 0000000000000000000000000000000000000000..78259ee131b79077e5bf4f1df29646ed72def240 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/models/finetune_mt.py @@ -0,0 +1,355 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +import contextlib +from argparse import Namespace +from typing import Any, Optional + +import torch +import torch.nn as nn +from dataclasses import dataclass, field +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import BaseFairseqModel, FairseqEncoder, register_model +from fairseq.models.hubert.hubert import MASKING_DISTRIBUTION_CHOICES +from fairseq.tasks import FairseqTask +from omegaconf import II, MISSING + +from fairseq.models.hubert.hubert_asr import HubertCtcConfig +from fairseq.models.transformer import TransformerConfig +logger = logging.getLogger(__name__) + + +@dataclass +class HubertMTConfig(HubertCtcConfig): + use_rel_pos_enc: bool = field( + default=True, + metadata={"help": "whether to use relative positional encoding"}, + ) + # for decoder + decoder_layerdrop: float = field( + default=0.1, + metadata={"help": "probability of dropping a decoder layer in hubert"}, + ) + add_decoder: bool = field( + default=False, + metadata={"help": "whether to add decoder for CE Loss on code"}, + ) + reuse_text_emb: bool = field( + default=False, + metadata={"help": "reuse text token embeddings instead of initialize randomly"}, + ) + freeze_decoder_updates: int = field( + default=0, + metadata={"help": "dont finetune hubert for this many updates"}, + ) + share_decoder_input_output_embed: bool = field( + default=False, + metadata={"help": "share decoder input and output embeddings"}, + ) + share_enc_dec_embeddings: bool = field( + default=False, + metadata={"help": "share embeddings of (text encoder, text decoder)"}, + ) + share_s2t_t2t_embeddings: bool = field( + default=False, + metadata={"help": "share embeddings of (speech2text(code), text2text)"}, + ) + share_ctc_decoder_embed: bool = field( + default=False, + metadata={"help": "share ctc and decoder embedding (only when share_decoder_input_output_embed is true)"}, + ) + enc_grad_mult: float = field( + default=1.0, + metadata={"help": "reset feature grad mult in hubert to this (only for st2t)"}, + ) + retain_dict_path: Optional[str] = field( + default=None, + metadata={"help": "delete embeddings according to this path"}, + ) + load_pretrained_mbart_from: Optional[str] = field( + default=None, + metadata={ + "help": "model to take text encoder decoder weights from (for initialization)" + }, + ) + text_transformer_encoder_layers: int = field( + default=12, + metadata={"help": "reset text_transformer_encoder_layers"}, + ) + +@register_model("finetune_mt", dataclass=HubertMTConfig) +class YitransMT(BaseFairseqModel): + def __init__(self, cfg: HubertMTConfig, w2v_encoder: BaseFairseqModel): + super().__init__() + self.cfg = cfg + self.w2v_encoder = w2v_encoder + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: HubertMTConfig, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = HubertEncoder(cfg, task.target_dictionary) + return cls(cfg, w2v_encoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + if "decoder_out" in net_output: + return self.w2v_encoder.get_normalized_probs_decoder(net_output["decoder_out"], log_probs, sample) + + assert "encoder_out" not in net_output + if "encoder_out" not in net_output: + return self.w2v_encoder.get_normalized_probs_decoder(net_output, log_probs, sample) + + + def get_logits(self, net_output): + logits = net_output["encoder_out"] + padding = net_output["encoder_padding_mask"] + if padding is not None and padding.any(): + padding = padding.T + logits[padding][..., 0] = 0 + logits[padding][..., 1:] = float("-inf") + + return logits + + def forward(self, **kwargs): + x = self.w2v_encoder(**kwargs) + return x + + @property + def encoder(self): + return self.w2v_encoder + + def reorder_encoder_out(self, encoder_out, new_order): + return self.encoder.reorder_encoder_out(encoder_out, new_order) + + @property + def decoder(self): + return self.w2v_encoder.w2v_model.decoder + + +class HubertEncoder(FairseqEncoder): + def __init__(self, cfg: HubertMTConfig, tgt_dict=None): + self.apply_mask = cfg.apply_mask + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "decoder_layerdrop": cfg.decoder_layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + "decoder_dict_size": -1, + "add_text_modality": True, + "add_text_encoder": True, + "load_pretrained_mbart_from": None, + "load_pretrained_w2v_from": None, + "text_transformer": { + "encoder":{ + "layers": cfg.text_transformer_encoder_layers, + "layerdrop": cfg.layerdrop, + }, + 'dropout': cfg.dropout, + 'attention_dropout': cfg.attention_dropout, + 'activation_dropout': cfg.activation_dropout, + } + } + if cfg.no_pretrained_weights: + arg_overrides["use_rel_pos_enc"] = cfg.use_rel_pos_enc + arg_overrides["share_enc_dec_embeddings"] = cfg.share_enc_dec_embeddings + arg_overrides["share_s2t_t2t_embeddings"] = cfg.share_s2t_t2t_embeddings + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu( + cfg.w2v_path, arg_overrides + ) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + # logger.info("---------------------state.keys()-------------------------------------------") + # logger.info(state.keys()) + # logger.info("---------------------w2v_args.task-------------------------------------------") + # logger.info(w2v_args.task) + # logger.info("---------------------w2v_args.model-------------------------------------------") + # logger.info(w2v_args.model) + # logger.info("----------------------------------------------------------------") + + w2v_args.task.data = cfg.data + w2v_args.task.text_cfg.text_data = cfg.data + w2v_args.task.text_cfg.data_config = None + task = tasks.setup_task(w2v_args.task) + + if state is not None and "task_state" in state: + # This will load the stored "dictionaries" object + task.load_state_dict(state["task_state"]) + + model = task.build_model(w2v_args.model) + + + ### load mbart if specificed + if cfg.load_pretrained_mbart_from is not None and cfg.no_pretrained_weights: + logger.info("Loading mbart....") + mbart_model_state = model.load_checkpoint(cfg.load_pretrained_mbart_from) + model.text_encoder = model.load_pretrained_component_from_model( + component=model.text_encoder, state=mbart_model_state + ) + model.decoder = model.load_pretrained_component_from_model( + component=model.decoder, state=mbart_model_state + ) + + if state is not None and not cfg.no_pretrained_weights: + logger.info("Loading pre-trained models....") + model.load_state_dict(state["model"], strict=True) + + ### remove_pretraining_modules model.remove_pretraining_modules() + model.target_glu = None + model.final_proj = None + model.feature_extractor = None + model.post_extract_proj = None + model.encoder = None + + + + dropout_keys = [ n for n in w2v_args.model.text_transformer if n.find("drop") >= 0 ] + for key in dropout_keys: + logger.info(f"{key}: {w2v_args.model.text_transformer[key]}") + + super().__init__(task.source_dictionary) + + d = w2v_args.model.encoder_embed_dim + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.freeze_decoder_updates = cfg.freeze_decoder_updates + self.num_updates = 0 + + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, src_tokens, src_lengths, prev_output_tokens, tbc=True, **kwargs): + + # ft = self.freeze_finetune_updates <= self.num_updates + w2v_args = { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + "mask": self.apply_mask and self.training, + "prev_output_tokens": prev_output_tokens, + } + + results = self.w2v_model(**w2v_args) + return results + + def get_normalized_probs_decoder(self, net_output, log_probs, sample=None): + # net_output['encoder_out'] is a (B, T, D) tensor + return self.w2v_model.get_normalized_probs(net_output, log_probs, sample) + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + if isinstance(encoder_out["encoder_out"], list): + encoder_out["encoder_out"] = ( + [] if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + else: + encoder_out["encoder_out"] = encoder_out[ + "encoder_out" + ].index_select(1, new_order) + if encoder_out["encoder_padding_mask"] is not None: + if isinstance(encoder_out["encoder_padding_mask"], list): + encoder_out["encoder_padding_mask"] = ( + [] if len(encoder_out["encoder_padding_mask"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"]] + ) + else: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + if "decoder_out" in encoder_out and encoder_out["decoder_out"] is not None: + if isinstance(encoder_out["decoder_out"], list): + encoder_out["decoder_out"] = ( + [] if len(encoder_out["decoder_out"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["decoder_out"]] + ) + else: + encoder_out["decoder_out"] = encoder_out[ + "decoder_out" + ].index_select(0, new_order) + if "encoder_out_for_ctc" in encoder_out and encoder_out["encoder_out_for_ctc"] is not None: + if isinstance(encoder_out["encoder_out_for_ctc"], list): + encoder_out["encoder_out_for_ctc"] = ( + [] if len(encoder_out["encoder_out_for_ctc"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out_for_ctc"]] + ) + else: + encoder_out["encoder_out_for_ctc"] = encoder_out[ + "encoder_out_for_ctc" + ].index_select(1, new_order) + + return encoder_out + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + encoder_out = self.w2v_model.forward_torchscript(net_input) + if "encoder_out_for_ctc" in encoder_out: + del encoder_out['encoder_out_for_ctc'] + + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/models/finetune_st.py b/SpeechT5/YiTrans/yitrans_iwslt22/models/finetune_st.py new file mode 100644 index 0000000000000000000000000000000000000000..37e75bee1f4f669333cd275ad715466571be9c69 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/models/finetune_st.py @@ -0,0 +1,434 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +import contextlib +import pickle +from argparse import Namespace +from typing import Any, Optional + +import torch +import torch.nn as nn +import torch.nn.functional as F +from dataclasses import dataclass, field +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import BaseFairseqModel, FairseqEncoder, register_model +from fairseq.models.hubert.hubert_asr import HubertCtcConfig, HubertAsrConfig +from fairseq.tasks import FairseqTask +from fairseq.data.data_utils import lengths_to_padding_mask +from omegaconf import II, open_dict + + +logger = logging.getLogger(__name__) + +@dataclass +class HubertSt2tCtcConfig(HubertCtcConfig): + load_speech_only: bool = II("task.load_speech_only") + ## for decoder overrides + decoder_layerdrop: float = field( + default=0.1, + metadata={"help": "probability of dropping a decoder layer in hubert"}, + ) + add_decoder: bool = field( + default=False, + metadata={"help": "whether to add decoder for CE Loss on code"}, + ) + reuse_text_emb: bool = field( + default=False, + metadata={"help": "reuse text token embeddings instead of initialize randomly"}, + ) + freeze_decoder_updates: int = field( + default=0, + metadata={"help": "dont finetune hubert for this many updates"}, + ) + # share_enc_dec_embeddings: bool = field( + # default=False, + # metadata={"help": "share embeddings of (text encoder, text decoder)"}, + # ) + share_s2t_t2t_embeddings: bool = field( + default=False, + metadata={"help": "share embeddings of (speech2text(code), text2text)"}, + ) + share_ctc_decoder_embed: bool = field( + default=False, + metadata={"help": "share ctc and decoder embedding (only when share_decoder_input_output_embed is true)"}, + ) + enc_grad_mult: float = field( + default=1.0, + metadata={"help": "reset feature grad mult in hubert to this (only for st2t)"}, + ) + retain_dict_path: Optional[str] = field( + default=None, + metadata={"help": "delete embeddings according to this path"}, + ) + load_step2_model_from: Optional[str] = field( + default=None, + metadata={ + "help": "load step2 model from" + }, + ) + + # for other overrides + adaptor_stride: int = field( + default=2, + metadata={"help": "adaptor stride"}, + ) + +@register_model("hubert_st2t", dataclass=HubertSt2tCtcConfig) +class HubertST2T(BaseFairseqModel): + def __init__(self, cfg: HubertSt2tCtcConfig, w2v_encoder: BaseFairseqModel): + super().__init__() + self.cfg = cfg + self.w2v_encoder = w2v_encoder + self.num_updates = 0 + + ### in case we need load hubert_step2 model + if cfg.load_step2_model_from: + logger.info(f"Loading hubert_step2 pretrained model for finetuning: {cfg.load_step2_model_from}") + hubert_step2_states = self.w2v_encoder.w2v_model.load_checkpoint(cfg.load_step2_model_from)["model"] + if cfg.retain_dict_path is not None: + with open(cfg.retain_dict_path, "rb") as fp: + overlap_idxs = pickle.load(fp) + if hubert_step2_states['w2v_encoder.w2v_model.decoder.output_projection.0.weight'].size(0) != len(overlap_idxs): + assert self.w2v_encoder.w2v_model.add_text_modality, "Mustc have text modality if retain dict path" + logger.info("Cut embedding to a smaller size according to retain dict") + hubert_step2_states['w2v_encoder.w2v_model.decoder.output_projection.0.weight'] = hubert_step2_states['w2v_encoder.w2v_model.decoder.output_projection.0.weight'][overlap_idxs] + hubert_step2_states["w2v_encoder.w2v_model.decoder.embed_tokens_list.0.weight"] = hubert_step2_states["w2v_encoder.w2v_model.decoder.embed_tokens_list.0.weight"][overlap_idxs] + if hubert_step2_states.get("w2v_encoder.w2v_model.text_encoder.embed_tokens.weight") is not None: + hubert_step2_states["w2v_encoder.w2v_model.text_encoder.embed_tokens.weight"] = hubert_step2_states["w2v_encoder.w2v_model.text_encoder.embed_tokens.weight"][overlap_idxs] + else: + logger.info(f"cfg.load_step2_model_from matches the cut embedding dims {len(overlap_idxs)}, no cutting needs to do") + if not self.cfg.load_speech_only and hubert_step2_states.get("w2v_encoder.w2v_model.text_encoder.embed_tokens.weight", None) is None: + hubert_step2_states["w2v_encoder.w2v_model.text_encoder.embed_tokens.weight"] = hubert_step2_states["w2v_encoder.w2v_model.decoder.embed_tokens_list.0.weight"] + try: + self.load_state_dict(hubert_step2_states, strict=True) + except Exception as e: + logger.warn(e) + self.load_state_dict(hubert_step2_states, strict=False) + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: HubertSt2tCtcConfig, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = HubertEncoder(cfg, task.target_dictionary) + return cls(cfg, w2v_encoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + if "encoder_out" not in net_output: + return self.w2v_encoder.get_normalized_probs_decoder(net_output, log_probs, sample) + + if "encoder_out_for_ctc" in net_output: + logits = net_output["encoder_out_for_ctc"] + else: + logits = net_output["encoder_out"] + + if isinstance(logits, list): + logits = logits[0] + + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + def get_logits(self, net_output): + logits = net_output["encoder_out"] + padding = net_output["encoder_padding_mask"] + if padding is not None and padding.any(): + padding = padding.T + logits[padding][..., 0] = 0 + logits[padding][..., 1:] = float("-inf") + + return logits + + def forward(self, **kwargs): + x = self.w2v_encoder(**kwargs) + return x + + @property + def encoder(self): + return self.w2v_encoder + + def reorder_encoder_out(self, encoder_out, new_order): + return self.encoder.reorder_encoder_out(encoder_out, new_order) + + @property + def decoder(self): + return self.w2v_encoder.w2v_model.decoder + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + +class HubertEncoder(FairseqEncoder): + def __init__(self, cfg: HubertAsrConfig, tgt_dict=None): + self.apply_mask = cfg.apply_mask + logger.info(f"self.apply_mask: {self.apply_mask}") + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "decoder_layerdrop": cfg.decoder_layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + "decoder_dict_size": len(tgt_dict) if cfg.add_decoder else -1, + "share_decoder_input_output_embed": cfg.share_decoder_input_output_embed, + "load_pretrained_w2v_from": cfg.load_pretrained_w2v_from, + "load_pretrained_mbart_from": None, + "adaptor_stride": cfg.adaptor_stride, + "share_speech_text_embeddings": cfg.share_speech_text_embeddings, + } + + if cfg.no_pretrained_weights: + arg_overrides["use_rel_pos_enc"] = cfg.use_rel_pos_enc + arg_overrides["encoder_layers"] = cfg.encoder_layers + arg_overrides["add_text_encoder"] = cfg.add_text_encoder + arg_overrides["share_all_embeddings"] = cfg.share_all_embeddings + arg_overrides["add_adaptor"] = cfg.add_adaptor + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + ## in speech_text_joint_to_text, data is loaded by soundfile, which returns without normalization + self.need_preprocess = w2v_args.task.normalize + logger.warn("We need normalize the input wavform from the src_tokens") + + if cfg.normalize != w2v_args.task.normalize: + logger.warn( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + if not "share_speech_text_embeddings" in w2v_args.model: + with open_dict(w2v_args.model): + w2v_args.model.share_speech_text_embedding = cfg.share_speech_text_embeddings + logger.info(f"share_speech_text_embeddings: {(getattr(w2v_args.model, 'share_speech_text_embeddings', False))}") + w2v_args.task.data = cfg.data + w2v_args.task.add_decoder = cfg.add_decoder + assert w2v_args.model._name == "hubert" + + task = tasks.setup_task(w2v_args.task) + if state is not None and "task_state" in state: + # This will load the stored "dictionaries" object + task.load_state_dict(state["task_state"]) + model = task.build_model(w2v_args.model) + + ### modify the embed_tokens and output_projection of decoder + if state is not None and not cfg.no_pretrained_weights: + model_states = self.modify_states(state['model'], cfg.retain_dict_path, cfg.reuse_text_emb) + try: + model.load_state_dict(model_states, strict=True) + except Exception as e: + logger.warn(e) + model.load_state_dict(model_states, strict=False) + + ### in case we need load mbart + if cfg.no_pretrained_weights and cfg.load_pretrained_mbart_from: + logger.info("Loading mbart ...") + mbart_state = model.load_checkpoint(cfg.load_pretrained_mbart_from) + mbart_state["model"] = self.modify_states(mbart_state["model"], cfg.retain_dict_path, cfg.reuse_text_emb, is_mbart=True) + model.text_encoder = model.load_pretrained_component_from_model( + component=model.text_encoder, state=mbart_state + ) + model.decoder = model.load_pretrained_component_from_model( + component=model.decoder, state=mbart_state + ) + + model.remove_pretraining_modules(step2=(not cfg.load_speech_only)) + # model.remove_pretraining_modules() + + super().__init__(task.source_dictionary) + + d = w2v_args.model.encoder_embed_dim + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.freeze_decoder_updates = cfg.freeze_decoder_updates + self.num_updates = 0 + self.enc_grad_mult = cfg.enc_grad_mult + + def modify_states(self, model_states, retain_dict_path=None, reuse_text_emb=False, is_mbart=False): + if retain_dict_path is not None: + logger.info("Cut embedding to a smaller size according to retain dict") + with open(retain_dict_path, "rb") as fp: + overlap_idxs = pickle.load(fp) + if is_mbart: + model_states["decoder.embed_tokens_list.1.weight"] = model_states["decoder.embed_tokens.weight"][overlap_idxs] + model_states["decoder.output_projection.1.weight"] = model_states["decoder.output_projection.weight"][overlap_idxs] + model_states["decoder.embed_tokens.weight"] = model_states["decoder.embed_tokens.weight"][overlap_idxs] + model_states["decoder.output_projection.weight"] = model_states["decoder.output_projection.weight"][overlap_idxs] + model_states["encoder.embed_tokens.weight"] = model_states["encoder.embed_tokens.weight"][overlap_idxs] + else: + model_states['decoder.output_projection.1.weight'] = model_states['decoder.output_projection.1.weight'][overlap_idxs] + model_states["decoder.embed_tokens_list.1.weight"] = model_states["decoder.embed_tokens_list.1.weight"][overlap_idxs] + model_states["text_encoder.embed_tokens.weight"] = model_states["text_encoder.embed_tokens.weight"][overlap_idxs] + if reuse_text_emb: + logger.info("Loading decoder.embed_tokens_list.0 <-- decoder.embed_tokens_list.1") + model_states["decoder.embed_tokens_list.0.weight"] = model_states["decoder.embed_tokens_list.1.weight"] + model_states["decoder.output_projection.0.weight"] = model_states["decoder.output_projection.1.weight"] + del model_states["decoder.embed_tokens_list.1.weight"] + del model_states["decoder.output_projection.1.weight"] + return model_states + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, src_tokens=None, src_lengths=None, src_txt_tokens=None, src_txt_lengths=None, prev_output_tokens=None, tbc=True, **kwargs): + padding_mask = lengths_to_padding_mask(src_lengths) + if self.need_preprocess: + src_tokens = torch.stack([F.layer_norm(wav, wav.shape) for wav in src_tokens]) + src_tokens[padding_mask] = 0.0 + + ft = self.freeze_finetune_updates <= self.num_updates + w2v_args = { + "source": src_tokens, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + "prev_output_tokens": prev_output_tokens, + "ft": ft, + "enc_grad_mult": self.enc_grad_mult, + } + + if self.freeze_decoder_updates <= self.num_updates: + self.w2v_model.add_decoder = True + else: + self.w2v_model.add_decoder = False + + x, padding_mask, decoder_out = self.w2v_model.extract_features(**w2v_args) + + if tbc: + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + x = self.final_dropout(x) + + if src_txt_tokens is not None: + w2v_args_text = { + "src_tokens": src_txt_tokens, + "src_lengths": src_txt_lengths, + "prev_output_tokens": prev_output_tokens, + } + + decoder_output_text = self.w2v_model(**w2v_args_text) + decoder_out = (torch.cat([decoder_out[0], decoder_output_text['decoder_out'][0]], dim=0), {"attn_cost": None}) + + return decoder_out + + def get_normalized_probs_decoder(self, net_output, log_probs, sample=None): + # net_output['encoder_out'] is a (B, T, D) tensor + return self.w2v_model.get_normalized_probs(net_output, log_probs, sample) + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + if isinstance(encoder_out["encoder_out"], list): + encoder_out["encoder_out"] = ( + [] if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + else: + encoder_out["encoder_out"] = encoder_out[ + "encoder_out" + ].index_select(1, new_order) + if encoder_out["encoder_padding_mask"] is not None: + if isinstance(encoder_out["encoder_padding_mask"], list): + encoder_out["encoder_padding_mask"] = ( + [] if len(encoder_out["encoder_padding_mask"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"]] + ) + else: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + if "decoder_out" in encoder_out and encoder_out["decoder_out"] is not None: + if isinstance(encoder_out["decoder_out"], list): + encoder_out["decoder_out"] = ( + [] if len(encoder_out["decoder_out"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["decoder_out"]] + ) + else: + encoder_out["decoder_out"] = encoder_out[ + "decoder_out" + ].index_select(0, new_order) + + return encoder_out + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + padding_mask = lengths_to_padding_mask(net_input["src_lengths"]) + src_tokens = net_input["src_tokens"] + if self.need_preprocess: + src_tokens = torch.stack([F.layer_norm(wav, wav.shape) for wav in src_tokens]) + src_tokens[padding_mask] = 0.0 + + _net_input = { + "source": src_tokens, + "padding_mask": padding_mask, + } + + encoder_out = self.w2v_model.forward_torchscript(_net_input) + + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/models/pretrain_ed.py b/SpeechT5/YiTrans/yitrans_iwslt22/models/pretrain_ed.py new file mode 100644 index 0000000000000000000000000000000000000000..a07fab74df97ca424f2201d3e8f8826ece2c82c4 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/models/pretrain_ed.py @@ -0,0 +1,698 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +import contextlib +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple, Union +from collections import OrderedDict + +import copy +import torch +from omegaconf import II + +from fairseq import checkpoint_utils +from fairseq.data.dictionary import Dictionary +from fairseq.dataclass import ChoiceEnum +from fairseq.models import register_model, FairseqDecoder +from fairseq.models.transformer import ( + TransformerEncoderBase, + TransformerConfig, +) +from fairseq.models.speech_to_text import Conv1dAdaptor +from fairseq.models.transformer import Embedding +from fairseq.file_io import PathManager +from torch import Tensor +from fairseq.models.wav2vec.wav2vec2 import ConvFeatureExtractionModel +from fairseq.modules import GradMultiply + +from fairseq.models.hubert import HubertConfig, HubertModel + +from fairseq.models.wav2vec.wav2vec2 import TransformerEncoder as W2vTransformerEncoder +from yitrans_iwslt22.modules.w2v_encoder import TransformerEncoder +from yitrans_iwslt22.modules.transformer_decoder import TransformerDecoderScriptable +from yitrans_iwslt22.modules.multimodal_transformer_decoder import MultimodalTransformerDecoder +from yitrans_iwslt22.tasks.iwslt_joint_pretraining import ( + JointPretrainingConfig, + JointPretrainingTask, +) + +logger = logging.getLogger(__name__) + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) + + +@dataclass +class JointEDConfig(HubertConfig): + use_rel_pos_enc: bool = field( + default=False, + metadata={"help": "whether to use relative positional encoding"}, + ) + + # decoder + decoder_layers: int = field( + default=6, metadata={"help": "num decoder layers in the transformer"} + ) + decoder_embed_dim: int = field( + default=768, metadata={"help": "decoder embedding dimension"} + ) + decoder_ffn_embed_dim: int = field( + default=3072, metadata={"help": "decoder embedding dimension for FFN"} + ) + decoder_attention_heads: int = field( + default=12, metadata={"help": "num decoder attention heads"} + ) + decoder_normalize_before: bool = field( + default=False, + metadata={"help": "apply layernorm before each decoder block"}, + ) + layernorm_embedding: bool = field( + default=False, + metadata={"help": "apply layernorm to embedding for decoder"}, + ) + decoder_layerdrop: float = field( + default=0.1, + metadata={"help": "probability of dropping a tarnsformer layer"}, + ) + share_decoder_input_output_embed: bool = field( + default=False, + metadata={"help": "share decoder input and output embeddings"}, + ) + share_enc_dec_embeddings: bool = field( + default=False, + metadata={"help": "share embeddings of (text encoder, text decoder)"}, + ) + share_s2t_t2t_embeddings: bool = field( + default=False, + metadata={"help": "share embeddings of (speech2text(code), text2text)"}, + ) + decoder_output_dim: int = field( + default=768, metadata={"help": "decoder output dimension"} + ) + max_target_positions: int = field( + default=3000, metadata={"help": "max target position"} + ) + no_scale_embedding: bool = field( + default=False, + metadata={"help": "not scale embedding"}, + ) + adaptive_input: bool = field( + default=False, + metadata={"help": "adaptive input"}, + ) + quant_noise_pq: int = field( + default=0, metadata={"help": "quant noise pq"} + ) + decoder_learned_pos: bool = field( + default=False, + metadata={"help": "decoder learnable positional embedding"}, + ) + no_token_positional_embeddings: bool = field( + default=False, + metadata={"help": "no token positional embeddings"}, + ) + add_text_modality: bool = field( + default=-False, + metadata={"help": "add text modality, mainly used in pretrainnig"}, + ) + add_text_encoder: bool = field( + default=False, + metadata={"help": "add_text_encoder"}, + ) + share_text_encoder: bool = field( + default=True, + metadata={"help": "share text encoder so that speech branch go through it"}, + ) + split_attention: bool = field( + default=False, + metadata={"help": "use shared but split encoders"}, + ) + add_adaptor: bool = field( + default=False, + metadata={"help": "add adaptor and text encoder on the top of speech encoder"}, + ) + adaptor_n_layers: int = field( + default=3, + metadata={"help": "number of layers for adaptor"}, + ) + adaptor_kernel_size: int = field( + default=3, + metadata={"help": "kernel size for adaptor"}, + ) + adaptor_stride: int = field( + default=2, + metadata={"help": "adaptor stride"}, + ) + adaptor_layernorm: bool = field( + default=False, + metadata={"help": "adaptor layernorm"}, + ) + # Finetune related + decoder_dict_size: int = field( + default=-1, + metadata={"help": "decoder dictionary dimension"}, + ) + + # text encoder related, TransformerConfig is used in bart but we only use its enconder + text_transformer: TransformerConfig = TransformerConfig() + + # other + checkpoint_activations: bool = field( + default=False, metadata={"help": "recompute activations and save memory for extra compute"} + ) + + # Load pre-train model + load_pretrained_mbart_from: Optional[str] = field( + default=None, + metadata={ + "help": "model to take text encoder decoder weights from (for initialization)" + }, + ) + load_pretrained_w2v_from: Optional[str] = field( + default=None, + metadata={ + "help": "model to take speech encoder weights from (for initialization)" + }, + ) + + # FP16 optimization + required_seq_len_multiple: int = field( + default=1, + metadata={ + "help": "pad the input to encoder such that the sequence length is divisible by multiple" + }, + ) + crop_seq_to_multiple: int = field( + default=1, + metadata={ + "help": "crop convolutional feature extractor output such that the sequence length is divisible by multiple" + }, + ) + +@register_model("joint_ed", dataclass=JointEDConfig) +class JointEDModel(HubertModel): + def __init__( + self, + cfg: JointEDConfig, + task_cfg: JointPretrainingConfig, + dictionaries: List[Dictionary], + text_dictionary: Dictionary = None, + ) -> None: + super().__init__(cfg, task_cfg, dictionaries) + logger.info(f"JointEDModel Config: {cfg}") + + self.encoder = TransformerEncoder(cfg) + + ### build speeech-text joint_pretrain net from: + ### - add_text_modality is false: no text network + ### - add_text_modality is true, add_text_encoder=False: build text embedding + ### - add_text_modality is true, add_text_encoder=True: build text embedding and encoder + assert cfg.add_text_modality + assert cfg.add_text_encoder + assert cfg.share_text_encoder + assert text_dictionary is not None + self.add_text_modality = cfg.add_text_modality + self.add_text_encoder = cfg.add_text_encoder + self.share_text_encoder = cfg.share_text_encoder + + if cfg.share_s2t_t2t_embeddings: + text_dictionary = self.cutting_dictionary(text_dictionary, cfg.decoder_dict_size) + + ### build text encoder + text_encoder_embed_tokens = self.build_embedding( + text_dictionary, cfg.text_transformer.encoder.embed_dim + ) + self.text_encoder = TransformerEncoderBase( + cfg.text_transformer, + text_dictionary, + text_encoder_embed_tokens + ) + + ### build text decoder + self.add_decoder = task_cfg.add_decoder + if self.add_decoder: + # To make sure that the decoder dict size is the same as the fine-tuning tgt_dict size or bpe code dict size + s2t_dec_dict = self.cutting_dictionary(dictionaries[0], cfg.decoder_dict_size) + if text_dictionary is None: + decoder_dict_list = [s2t_dec_dict] + else: + decoder_dict_list = [s2t_dec_dict, text_dictionary] + + decoder_embed_tokens = [ + self.build_embedding(dictionary, cfg.decoder_embed_dim) + for dictionary in decoder_dict_list + ] + + if cfg.share_enc_dec_embeddings and text_dictionary is not None: + assert cfg.share_decoder_input_output_embed, "Must share decoder input-output embed before share encoder-decoder embed" + logger.info("--------------------------------: share input-output embeddings") + decoder_embed_tokens[-1] = text_encoder_embed_tokens + + if cfg.share_s2t_t2t_embeddings: + logger.info("--------------------------------: share s2t-t2t embeddings") + assert len(s2t_dec_dict) == len(text_dictionary), "s2t embed len must be equal to t2t embed len" + decoder_embed_tokens[0] = text_encoder_embed_tokens + + if len(decoder_embed_tokens) == 1: + self.decoder = TransformerDecoderScriptable(cfg, decoder_dict_list[0], decoder_embed_tokens[0]) + else: + self.decoder = MultimodalTransformerDecoder(cfg, decoder_dict_list, decoder_embed_tokens) + + self.add_adaptor = cfg.add_adaptor + if self.add_adaptor: + assert self.add_text_encoder, "Cannot shared encoder for text and speech once add adaptor" + self.adaptor = Conv1dAdaptor( + cfg.encoder_embed_dim, + cfg.decoder_embed_dim, + n_layers=cfg.adaptor_n_layers, + kernel_size=cfg.adaptor_kernel_size, + stride=cfg.adaptor_stride, + add_layernorm=cfg.adaptor_layernorm, + ) + + if cfg.load_pretrained_w2v_from is not None: + w2v_model_state = self.load_checkpoint(cfg.load_pretrained_w2v_from) + self.feature_extractor = self.load_pretrained_component_from_model( + component=self.feature_extractor, state=w2v_model_state + ) + + self.encoder = self.load_pretrained_component_from_model( + component=self.encoder, state=w2v_model_state + ) + + self.post_extract_proj.weight = torch.nn.Parameter(w2v_model_state["model"]["post_extract_proj.weight"]) + self.post_extract_proj.bias = torch.nn.Parameter(w2v_model_state["model"]["post_extract_proj.bias"]) + + # self.final_proj.weight = torch.nn.Parameter(w2v_model_state["model"]["final_proj.weight"]) + # self.final_proj.bias = torch.nn.Parameter(w2v_model_state["model"]["final_proj.bias"]) + + self.layer_norm.weight = torch.nn.Parameter(w2v_model_state["model"]["layer_norm.weight"]) + self.layer_norm.bias = torch.nn.Parameter(w2v_model_state["model"]["layer_norm.bias"]) + + # self.label_embs_concat.data = torch.nn.Parameter(w2v_model_state["model"]["label_embs_concat"]) + self.mask_emb.data = torch.nn.Parameter(w2v_model_state["model"]["mask_emb"]) + + if cfg.load_pretrained_mbart_from is not None: + mbart_model_state = self.load_checkpoint(cfg.load_pretrained_mbart_from) + if self.add_text_modality and self.add_text_encoder: + self.text_encoder = self.load_pretrained_component_from_model( + component=self.text_encoder, state=mbart_model_state + ) + if self.add_decoder: + self.decoder = self.load_pretrained_component_from_model( + component=self.decoder, state=mbart_model_state + ) + + def cutting_dictionary(self, dictionary, dict_size): + if dictionary is None or dict_size <= 0: + return dictionary + else: + cut_dictionary = copy.deepcopy(dictionary) + if dict_size > len(cut_dictionary): + for i in range(dict_size - len(cut_dictionary)): + cut_dictionary.symbols.append(f'_{i}_') + else: + cut_dictionary.symbols = cut_dictionary.symbols[:dict_size] + return cut_dictionary + + def build_embedding(self, dictionary, embed_dim): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + return Embedding(num_embeddings, embed_dim, padding_idx) + + @classmethod + def build_model(cls, cfg: HubertConfig, task: JointPretrainingTask): + """Build a new model instance.""" + # Change dict size for bpe code + if hasattr(task, "hubert_tokenizer") and task.hubert_tokenizer is not None and not task.fine_tuning and cfg.decoder_dict_size == -1: + cfg.decoder_dict_size = len(task.hubert_tokenizer.sp) + logger.info(f"Use acoustic pieces as code, set decoder dict size to {len(task.hubert_tokenizer.sp)}") + + text_dictionary = getattr(task, "text_dictionary", None) + model = JointEDModel(cfg, task.cfg, task.dictionaries, text_dictionary) + return model + + def get_normalized_probs( + self, + net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]], + log_probs: bool, + sample: Optional[Dict[str, Tensor]] = None, + ): + # net_output['encoder_out'] is a (B, T, D) tensor + lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample) + lprobs.batch_first = True + return lprobs + + def forward( + self, + source: torch.Tensor = None, + src_tokens: torch.Tensor = None, + src_lengths: torch.Tensor = None, + target_list: Optional[List[torch.Tensor]] = None, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = True, + features_only: bool = False, + output_layer: Optional[int] = None, + prev_output_tokens: Optional[torch.Tensor] = None, + text_modal_idx: Optional[int] = -1, + ) -> Dict[str, torch.Tensor]: + """output layer is 1-based""" + assert source is not None or src_tokens is not None + if source is not None: + ### 1. go speech cnn-encoder-decoder branch + features = self.forward_features(source) + if target_list is not None: + features, target_list = self.forward_targets(features, target_list) + + features_pen = features.float().pow(2).mean() + + features = features.transpose(1, 2) + features = self.layer_norm(features) + unmasked_features = features.clone() + + if padding_mask is not None: + padding_mask = self.forward_padding_mask(features, padding_mask) + + if self.post_extract_proj is not None: + features = self.post_extract_proj(features) + + features = self.dropout_input(features) + unmasked_features = self.dropout_features(unmasked_features) + + if mask: + x, mask_indices = self.apply_mask(features, padding_mask, target_list) + else: + x = features + mask_indices = None + + # feature: (B, T, D), float + # target: (B, T), long + # x: (B, T, D), float + # padding_mask: (B, T), bool + # mask_indices: (B, T), bool + x, _ = self.encoder( + x, + padding_mask=padding_mask, + layer=None if output_layer is None else output_layer - 1, + ) + + if features_only: + return {"x": x, "padding_mask": padding_mask, "features": features} + + def compute_pred(proj_x, target, label_embs): + # compute logits for the i-th label set + y = torch.index_select(label_embs, 0, target.long()) + negs = label_embs.unsqueeze(1).expand(-1, proj_x.size(0), -1) + if self.target_glu: + y = self.target_glu(y) + negs = self.target_glu(negs) + # proj_x: (S, D) + # y: (S, D) + # negs: (Neg, S, D) + return self.compute_nce(proj_x, y, negs) + + label_embs_list = self.label_embs_concat.split(self.num_classes, 0) + + if not self.skip_masked: + masked_indices = torch.logical_and(~padding_mask, mask_indices) + proj_x_m = self.final_proj(x[masked_indices]) + if self.untie_final_proj: + proj_x_m_list = proj_x_m.chunk(len(target_list), dim=-1) + else: + proj_x_m_list = [proj_x_m for _ in range(len(target_list))] + logit_m_list = [ + compute_pred(proj_x_m, t[masked_indices], label_embs_list[i]) + for i, (proj_x_m, t) in enumerate(zip(proj_x_m_list, target_list)) + ] + else: + logit_m_list = [None for _ in target_list] + + if not self.skip_nomask: + nomask_indices = torch.logical_and(~padding_mask, ~mask_indices) + proj_x_u = self.final_proj(x[nomask_indices]) + if self.untie_final_proj: + proj_x_u_list = proj_x_u.chunk(len(target_list), dim=-1) + else: + proj_x_u_list = [proj_x_u for _ in range(len(target_list))] + + logit_u_list = [ + compute_pred(proj_x_u, t[nomask_indices], label_embs_list[i]) + for i, (proj_x_u, t) in enumerate(zip(proj_x_u_list, target_list)) + ] + else: + logit_u_list = [None for _ in target_list] + + result = { + "logit_m_list": logit_m_list, + "logit_u_list": logit_u_list, + "padding_mask": padding_mask, + "features_pen": features_pen, + } + + x = x.transpose(0, 1) # T x B x C + # adaptor layers + if self.add_adaptor: + x, padding_mask = self.adaptor(x, padding_mask) + + # text encoder layers + if self.add_text_encoder and self.share_text_encoder: + for layer in self.text_encoder.layers: + x = layer( + x, encoder_padding_mask=padding_mask + ) + if self.text_encoder.layer_norm is not None: + x = self.text_encoder.layer_norm(x) + + # decoder layers + if self.add_decoder: + encoder_out = { + "encoder_out": [x], # T x B x C + "encoder_padding_mask": [padding_mask], # B x T + } + assert prev_output_tokens is not None + decoder_out = self.decoder( + prev_output_tokens=prev_output_tokens, encoder_out=encoder_out + ) + result['decoder_out'] = decoder_out + else: + ### 2. go text encoder-decoder branch + if self.add_text_encoder: + encoder_out = self.text_encoder( + src_tokens, src_lengths=src_lengths, return_all_hiddens=False + ) + else: + encoder_padding_mask = src_tokens.eq(self.text_padding_idx) + has_pads = src_tokens.device.type == "xla" or encoder_padding_mask.any() + x = self.text_embed_scale * self.text_encoder_embed_tokens(src_tokens) + x = x + self.text_embed_positions(src_tokens) + # x = self.dropout_input(x) + if has_pads: + x = x * (1 - encoder_padding_mask.unsqueeze(-1).type_as(x)) + kwargs={"modality": "text"} if self.split_attention else {} + x, _ = self.encoder( + x, + padding_mask=encoder_padding_mask, + conv_pos=False, + **kwargs, + ) + encoder_out = { + "encoder_out": [x.transpose(0, 1)], # T x B x C + "encoder_padding_mask": [encoder_padding_mask], # B x T + "src_lengths": [src_lengths], + } + + result = {"encoder_out": encoder_out} + if features_only: + return result + assert prev_output_tokens is not None + decoder_out = self.decoder( + prev_output_tokens=prev_output_tokens, encoder_out=encoder_out, modal_idx=text_modal_idx, + ) + result['decoder_out'] = decoder_out + + return result + + def forward_torchscript(self, net_input: Dict[str, Tensor]): + """A TorchScript-compatible version of forward. + + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + res = self.forward( + mask=False, + features_only=True, + **net_input, + ) + + if "source" in net_input: + res["x"] = res["x"].transpose(0, 1) # T x B x C + + x = res["x"] # T x B x C + padding_mask = res["padding_mask"] + if self.add_adaptor: + x, padding_mask = self.adaptor(x, padding_mask) + + # text encoder layers + if self.add_text_encoder and self.share_text_encoder: + for layer in self.text_encoder.layers: + x = layer( + x, encoder_padding_mask=padding_mask + ) + + if self.text_encoder.layer_norm is not None: + x = self.text_encoder.layer_norm(x) + + res["x"] = x + res["padding_mask"] = padding_mask + + encoder_out = { + "encoder_out": [res["x"]], # T x B x C + "encoder_padding_mask": [res["padding_mask"]], # B x T + } + else: + encoder_out = res["encoder_out"] + if "encoder_states" in encoder_out: + del encoder_out["encoder_states"] + if "src_tokens" in encoder_out: + del encoder_out["src_tokens"] + if "src_tokens" in encoder_out: + del encoder_out["src_lengths"] + return encoder_out + + def extract_features( + self, + source: torch.Tensor, + padding_mask: Optional[torch.Tensor] = None, + mask: bool = False, + ret_conv: bool = False, + output_layer: Optional[int] = None, + prev_output_tokens: Optional[torch.Tensor] = None, + ft: bool = True, + enc_grad_mult: float = 1.0, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """only for speech input""" + with torch.no_grad() if not ft else contextlib.ExitStack(): + res = self.forward( + source, + padding_mask=padding_mask, + mask=mask, + features_only=True, + output_layer=output_layer, + ) + + feature = res["features"] if ret_conv else res["x"] + + res["x"] = res["x"].transpose(0, 1) # T x B x C + x = res["x"] # T x B x C + padding_mask = res["padding_mask"] + if self.add_adaptor: + x, padding_mask = self.adaptor(x, padding_mask) + + # text encoder layers + if self.add_text_encoder and self.share_text_encoder: + for layer in self.text_encoder.layers: + x = layer( + x, encoder_padding_mask=padding_mask + ) + + if self.text_encoder.layer_norm is not None: + x = self.text_encoder.layer_norm(x) + + res["x"] = x + res["padding_mask"] = padding_mask + + if self.add_decoder and prev_output_tokens is not None: + encoder_out = { + "encoder_out": [res["x"]], # T x B x C + "encoder_padding_mask": [res["padding_mask"]], # B x T + } + + if enc_grad_mult != 1.0: + encoder_out = self.mult_rst_grad(encoder_out, enc_grad_mult) + + assert prev_output_tokens is not None + decoder_out = self.decoder( + prev_output_tokens=prev_output_tokens, + encoder_out=encoder_out, + ) + else: + decoder_out = None + return feature, res["padding_mask"], decoder_out + + def mult_rst_grad(self, rst, ratio): + assert isinstance(rst, dict) # instead of EncoderOut + assert len(rst["encoder_out"]) == 1 + rst["encoder_out"][0] = GradMultiply.apply(rst["encoder_out"][0], ratio) + return rst + + + def remove_pretraining_modules(self, step2=False): + self.target_glu = None + self.final_proj = None + if self.add_text_modality: + # Delete text embeddings of text encoder + if not step2: + if self.add_text_encoder: + self.text_encoder.embed_tokens = None + if hasattr(self.text_encoder, "embed_positions"): + self.text_encoder.embed_tokens = None + if hasattr(self.text_encoder, "layernorm_embedding"): + self.text_encoder.layernorm_embedding = None + else: + self.text_encoder_embed_tokens = None + self.text_embed_positions = None + if isinstance(self.decoder, MultimodalTransformerDecoder): + # Delete text embeddings of decoder + self.decoder.embed_tokens_list = self.decoder.embed_tokens_list[:1] + self.decoder.output_projection = self.decoder.output_projection[:1] + + def load_checkpoint(self, checkpoint: str): + if not PathManager.exists(checkpoint): + raise IOError("Model file not found: {}".format(checkpoint)) + state = checkpoint_utils.load_checkpoint_to_cpu(checkpoint) + return state + + def load_pretrained_component_from_model( + self, component: Union[TransformerEncoderBase, TransformerEncoder, W2vTransformerEncoder, FairseqDecoder, ConvFeatureExtractionModel], state + ): + """ + Load a pretrained FairseqEncoder or FairseqDecoder from checkpoint into the + provided `component` object. If state_dict fails to load, there may be a + mismatch in the architecture of the corresponding `component` found in the + `checkpoint` file. + """ + if isinstance(component, (TransformerEncoderBase, TransformerEncoder, W2vTransformerEncoder)): + component_type = "encoder" + elif isinstance(component, FairseqDecoder): + component_type = "decoder" + if isinstance(component, MultimodalTransformerDecoder): + state["model"]["decoder.embed_tokens_list.1.weight"] = state["model"]["decoder.embed_tokens.weight"] + state["model"]["decoder.output_projection.1.weight"] = state["model"]["decoder.output_projection.weight"] + elif isinstance(component, ConvFeatureExtractionModel): + component_type = "feature_extractor" + else: + print(component) + raise ValueError( + "component to load must be either a FairseqEncoder or " + "FairseqDecoder. Loading other component types are not supported." + ) + component_state_dict = OrderedDict() + for key in state["model"].keys(): + if key.startswith(component_type): + # encoder.input_layers.0.0.weight --> input_layers.0.0.weight + component_subkey = key[len(component_type) + 1 :] + component_state_dict[component_subkey] = state["model"][key] + try: + logger.info(f"Load {component_type}") + component.load_state_dict(component_state_dict, strict=True) + except Exception as e: + logger.warn(e) + component.load_state_dict(component_state_dict, strict=False) + return component diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/models/pretrain_ed_step2.py b/SpeechT5/YiTrans/yitrans_iwslt22/models/pretrain_ed_step2.py new file mode 100644 index 0000000000000000000000000000000000000000..82820bb95d5890db573c96241e3cd6c572adea34 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/models/pretrain_ed_step2.py @@ -0,0 +1,438 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +import logging +import contextlib +from argparse import Namespace +from typing import Any, Optional + +import torch +import torch.nn as nn +import pickle +from dataclasses import dataclass, field +from fairseq import checkpoint_utils, tasks, utils +from fairseq.dataclass import FairseqDataclass +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from fairseq.models import BaseFairseqModel, FairseqEncoder, register_model +from fairseq.models.hubert.hubert import MASKING_DISTRIBUTION_CHOICES +from fairseq.models.hubert.hubert_asr import HubertAsrConfig +from fairseq.tasks import FairseqTask +from omegaconf import II, MISSING + +from yitrans_iwslt22.modules.multimodal_transformer_decoder import MultimodalTransformerDecoder + +logger = logging.getLogger(__name__) + +@dataclass +class JointStep2Config(HubertAsrConfig): + ## for decoder overrides + decoder_layerdrop: float = field( + default=0.1, + metadata={"help": "probability of dropping a decoder layer in hubert"}, + ) + add_decoder: bool = field( + default=False, + metadata={"help": "whether to add decoder for CE Loss on code"}, + ) + reuse_text_emb: bool = field( + default=False, + metadata={"help": "reuse text token embeddings instead of initialize randomly"}, + ) + freeze_decoder_updates: int = field( + default=0, + metadata={"help": "dont finetune hubert for this many updates"}, + ) + # share_enc_dec_embeddings: bool = field( + # default=False, + # metadata={"help": "share embeddings of (text encoder, text decoder)"}, + # ) + share_s2t_t2t_embeddings: bool = field( + default=False, + metadata={"help": "share embeddings of (speech2text(code), text2text)"}, + ) + share_ctc_decoder_embed: bool = field( + default=False, + metadata={"help": "share ctc and decoder embedding (only when share_decoder_input_output_embed is true)"}, + ) + enc_grad_mult: float = field( + default=1.0, + metadata={"help": "reset feature grad mult in hubert to this (only for st2t)"}, + ) + retain_dict_path: Optional[str] = field( + default=None, + metadata={"help": "delete embeddings according to this path"}, + ) + load_step2_model_from: Optional[str] = field( + default=None, + metadata={ + "help": "load step2 model from" + }, + ) + + # for other overrides + adaptor_stride: int = field( + default=2, + metadata={"help": "adaptor stride"}, + ) + + # ## for reset some configs + # load_pretrained_mbart_from: Optional[str] = field( + # default=None, + # metadata={ + # "help": "model to take text encoder decoder weights from (for initialization)" + # }, + # ) + # load_pretrained_w2v_from: Optional[str] = field( + # default=None, + # metadata={ + # "help": "model to take speech encoder weights from (for initialization)" + # }, + # ) + # use_rel_pos_enc: bool = field( + # default=True, + # metadata={"help": "whether to use relative positional encoding"}, + # ) + # encoder_layers: int = field( + # default=12, + # metadata={"help": "encoder_layers"}, + # ) + # add_text_modality: bool = field( + # default=True, + # metadata={"help": "add_text_modality"}, + # ) + # add_text_encoder: bool = field( + # default=True, + # metadata={"help": "add_text_encoder"}, + # ) + # share_all_embeddings: bool = field( + # default=True, + # metadata={"help": "share text_encoder, decoder_input, decoder_output embeddings"}, + # ) + # add_adaptor: bool = field( + # default=True, + # metadata={"help": "add_adaptor"}, + # ) + + +@register_model("hubert_step2", dataclass=JointStep2Config) +class JointStep2Model(BaseFairseqModel): + def __init__(self, cfg: JointStep2Config, w2v_encoder: BaseFairseqModel): + super().__init__() + self.cfg = cfg + self.w2v_encoder = w2v_encoder + + def upgrade_state_dict_named(self, state_dict, name): + super().upgrade_state_dict_named(state_dict, name) + return state_dict + + @classmethod + def build_model(cls, cfg: JointStep2Config, task: FairseqTask): + """Build a new model instance.""" + w2v_encoder = JointED(cfg, task.target_dictionary) + return cls(cfg, w2v_encoder) + + def get_normalized_probs(self, net_output, log_probs, sample=None): + """Get normalized probabilities (or log probs) from a net's output.""" + if "encoder_out" not in net_output: + return self.w2v_encoder.get_normalized_probs_decoder(net_output, log_probs, sample) + + if "encoder_out_for_ctc" in net_output: + logits = net_output["encoder_out_for_ctc"] + else: + logits = net_output["encoder_out"] + + if isinstance(logits, list): + logits = logits[0] + + if log_probs: + return utils.log_softmax(logits.float(), dim=-1) + else: + return utils.softmax(logits.float(), dim=-1) + + def get_logits(self, net_output): + logits = net_output["encoder_out"] + padding = net_output["encoder_padding_mask"] + if padding is not None and padding.any(): + padding = padding.T + logits[padding][..., 0] = 0 + logits[padding][..., 1:] = float("-inf") + + return logits + + def forward(self, **kwargs): + x = self.w2v_encoder(**kwargs) + return x + + @property + def encoder(self): + return self.w2v_encoder + + def reorder_encoder_out(self, encoder_out, new_order): + return self.encoder.reorder_encoder_out(encoder_out, new_order) + + @property + def decoder(self): + return self.w2v_encoder.w2v_model.decoder + +class JointED(FairseqEncoder): + def __init__(self, cfg: JointStep2Config, tgt_dict=None): + self.apply_mask = cfg.apply_mask + logger.info(f"self.apply_mask: {self.apply_mask}") + + arg_overrides = { + "dropout": cfg.dropout, + "activation_dropout": cfg.activation_dropout, + "dropout_input": cfg.dropout_input, + "attention_dropout": cfg.attention_dropout, + "mask_length": cfg.mask_length, + "mask_prob": cfg.mask_prob, + "mask_selection": cfg.mask_selection, + "mask_other": cfg.mask_other, + "no_mask_overlap": cfg.no_mask_overlap, + "mask_channel_length": cfg.mask_channel_length, + "mask_channel_prob": cfg.mask_channel_prob, + "mask_channel_selection": cfg.mask_channel_selection, + "mask_channel_other": cfg.mask_channel_other, + "no_mask_channel_overlap": cfg.no_mask_channel_overlap, + "encoder_layerdrop": cfg.layerdrop, + "decoder_layerdrop": cfg.decoder_layerdrop, + "feature_grad_mult": cfg.feature_grad_mult, + "decoder_dict_size": len(tgt_dict) if cfg.add_decoder else -1, + "share_decoder_input_output_embed": cfg.share_decoder_input_output_embed, + "share_s2t_t2t_embeddings": cfg.share_s2t_t2t_embeddings, + "load_pretrained_w2v_from": None, + "load_pretrained_mbart_from": None, + "adaptor_stride": cfg.adaptor_stride, + } + + if cfg.w2v_args is None: + state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides) + w2v_args = state.get("cfg", None) + if w2v_args is None: + w2v_args = convert_namespace_to_omegaconf(state["args"]) + cfg.w2v_args = w2v_args + else: + state = None + w2v_args = cfg.w2v_args + if isinstance(w2v_args, Namespace): + cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args) + + if cfg.normalize != w2v_args.task.normalize: + logger.warn( + "Fine-tuning works best when data normalization is the same. " + "Please check that --normalize is set or unset for " + "both pre-training and here" + ) + + w2v_args.task.data = cfg.data + if hasattr(w2v_args.task, "text_cfg"): + w2v_args.task.text_cfg.data_config = None + w2v_args.task.add_decoder = cfg.add_decoder + task = tasks.setup_task(w2v_args.task) + if state is not None and "task_state" in state: + # This will load the stored "dictionaries" object + task.load_state_dict(state["task_state"]) + model = task.build_model(w2v_args.model) + + ### delete the embed_tokens and output_projection of decoder + if state is not None and not cfg.no_pretrained_weights: + if cfg.retain_dict_path is not None: + assert model.add_text_modality, "Mustc have text modality if retain dict path" + logger.info("Cut embedding to a smaller size according to ratin dict") + with open(cfg.retain_dict_path, "rb") as fp: + overlap_idxs = pickle.load(fp) + state['model']['decoder.output_projection.1.weight'] = state['model']['decoder.output_projection.1.weight'][overlap_idxs] + state["model"]["decoder.embed_tokens_list.1.weight"] = state["model"]["decoder.embed_tokens_list.1.weight"][overlap_idxs] + if cfg.reuse_text_emb: + assert model.add_text_modality, "Mustc have text modality if reuse text embed" + logger.info("Loading text-text pretrained token-embedding for speech-text finetuning...") + state["model"]["decoder.embed_tokens_list.0.weight"] = state["model"]["decoder.embed_tokens_list.1.weight"] + del state["model"]["decoder.embed_tokens_list.1.weight"] + state["model"]["decoder.output_projection.0.weight"] = state["model"]["decoder.output_projection.1.weight"] + del state["model"]["decoder.output_projection.1.weight"] + try: + model.load_state_dict(state["model"], strict=True) + except Exception as e: + logger.warn(e) + model.load_state_dict(state["model"], strict=False) + else: + for pname in list(state["model"].keys()): + if pname.startswith("decoder.embed_tokens") or pname.startswith("decoder.output_projection"): + del state["model"][pname] + # set strict=False because we omit some modules + model.load_state_dict(state["model"], strict=False) + + model.remove_pretraining_modules(step2=True) + + super().__init__(task.source_dictionary) + + d = w2v_args.model.encoder_embed_dim + + self.w2v_model = model + + self.final_dropout = nn.Dropout(cfg.final_dropout) + self.freeze_finetune_updates = cfg.freeze_finetune_updates + self.freeze_decoder_updates = cfg.freeze_decoder_updates + self.num_updates = 0 + + if cfg.share_ctc_decoder_embed: + assert cfg.add_decoder and cfg.share_decoder_input_output_embed, "Must share decoder input and output embed before share ctc and decoder embed" + if isinstance(self.w2v_model.decoder, MultimodalTransformerDecoder): + self.proj = nn.Linear( + self.w2v_model.decoder.embed_tokens_list[0].weight.shape[1], + self.w2v_model.decoder.embed_tokens_list[0].weight.shape[0], + bias=False, + ) + self.proj.weight = self.w2v_model.decoder.embed_tokens_list[0].weight + else: + self.proj = nn.Linear( + self.w2v_model.decoder.embed_tokens.weight.shape[1], + self.w2v_model.decoder.embed_tokens.weight.shape[0], + bias=False, + ) + self.proj.weight = self.w2v_model.decoder.embed_tokens.weight + elif tgt_dict is not None: + self.proj = Linear(d, len(tgt_dict)) + elif getattr(cfg, "decoder_embed_dim", d) != d: + self.proj = Linear(d, cfg.decoder_embed_dim) + else: + self.proj = None + + def set_num_updates(self, num_updates): + """Set the number of parameters updates.""" + super().set_num_updates(num_updates) + self.num_updates = num_updates + + def forward(self, source=None, src_tokens=None, src_lengths=None, padding_mask=None, prev_output_tokens=None, tbc=True, **kwargs): + assert source is not None or src_tokens is not None + if source is not None: + ### 1. go speech cnn-encoder-decoder branch + ft = self.freeze_finetune_updates <= self.num_updates + w2v_args = { + "source": source, + "padding_mask": padding_mask, + "mask": self.apply_mask and self.training, + "prev_output_tokens": prev_output_tokens, + "ft": ft, + } + + if self.freeze_decoder_updates <= self.num_updates: + self.w2v_model.add_decoder = True + else: + self.w2v_model.add_decoder = False + + x, padding_mask, decoder_out = self.w2v_model.extract_features(**w2v_args) + + if tbc: + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + x = self.final_dropout(x) + + if self.proj: + x = self.proj(x) + + return { + "encoder_out": x, # T x B x C + "encoder_padding_mask": padding_mask, # B x T + "padding_mask": padding_mask, + "decoder_out": decoder_out, + } + else: + ### 2. go text encoder-decoder branch + w2v_args = { + "src_tokens": src_tokens, + "src_lengths": src_lengths, + "prev_output_tokens": prev_output_tokens, + } + + return self.w2v_model(**w2v_args) + + def get_normalized_probs_decoder(self, net_output, log_probs, sample=None): + # net_output['encoder_out'] is a (B, T, D) tensor + return self.w2v_model.get_normalized_probs(net_output, log_probs, sample) + + def reorder_encoder_out(self, encoder_out, new_order): + if encoder_out["encoder_out"] is not None: + if isinstance(encoder_out["encoder_out"], list): + encoder_out["encoder_out"] = ( + [] if len(encoder_out["encoder_out"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]] + ) + else: + encoder_out["encoder_out"] = encoder_out[ + "encoder_out" + ].index_select(1, new_order) + if encoder_out["encoder_padding_mask"] is not None: + if isinstance(encoder_out["encoder_padding_mask"], list): + encoder_out["encoder_padding_mask"] = ( + [] if len(encoder_out["encoder_padding_mask"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"]] + ) + else: + encoder_out["encoder_padding_mask"] = encoder_out[ + "encoder_padding_mask" + ].index_select(0, new_order) + if "decoder_out" in encoder_out and encoder_out["decoder_out"] is not None: + if isinstance(encoder_out["decoder_out"], list): + encoder_out["decoder_out"] = ( + [] if len(encoder_out["decoder_out"]) == 0 + else [x.index_select(0, new_order) for x in encoder_out["decoder_out"]] + ) + else: + encoder_out["decoder_out"] = encoder_out[ + "decoder_out" + ].index_select(0, new_order) + if "encoder_out_for_ctc" in encoder_out and encoder_out["encoder_out_for_ctc"] is not None: + if isinstance(encoder_out["encoder_out_for_ctc"], list): + encoder_out["encoder_out_for_ctc"] = ( + [] if len(encoder_out["encoder_out_for_ctc"]) == 0 + else [x.index_select(1, new_order) for x in encoder_out["encoder_out_for_ctc"]] + ) + else: + encoder_out["encoder_out_for_ctc"] = encoder_out[ + "encoder_out_for_ctc" + ].index_select(1, new_order) + + return encoder_out + + def forward_torchscript(self, net_input): + """A TorchScript-compatible version of forward. + + Encoders which use additional arguments may want to override + this method for TorchScript compatibility. + """ + encoder_out = self.w2v_model.forward_torchscript(net_input) + + assert self.proj is not None + encoder_out['encoder_out_for_ctc'] = [self.proj(encoder_out['encoder_out'][0])] + + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + return None + + def upgrade_state_dict_named(self, state_dict, name): + return state_dict + + +def Embedding(num_embeddings, embedding_dim, padding_idx): + m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) + nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5) + nn.init.constant_(m.weight[padding_idx], 0) + return m + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/modules/__init__.py b/SpeechT5/YiTrans/yitrans_iwslt22/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6c611e8c6000172b43b1cdb213cd42b68cb3a685 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/modules/__init__.py @@ -0,0 +1,23 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +from .multihead_attention import MultiheadAttention +from .relative_pos_enc import RelativePositionalEncoding +from .transformer_decoder_layer import TransformerDecoderLayerBase +from .w2v_encoder import TransformerEncoder, TransformerSentenceEncoderLayer +from .multimodal_transformer_decoder import MultimodalTransformerDecoder + +__all__ = [ + "MultiheadAttention", + "RelativePositionalEncoding", + "TransformerDecoderLayerBase", + "TransformerEncoder", + "TransformerSentenceEncoderLayer", + "MultimodalTransformerDecoder", +] diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/modules/multihead_attention.py b/SpeechT5/YiTrans/yitrans_iwslt22/modules/multihead_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..7b1c1445037ada5aef5b8cf9fd3b63b05d95aca1 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/modules/multihead_attention.py @@ -0,0 +1,341 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +from typing import Dict, Optional, Tuple + +import torch +import torch.nn.functional as F +from fairseq import utils +from torch import Tensor + +from fairseq.modules import MultiheadAttention as FairseqMultiheadAttention + + +class MultiheadAttention(FairseqMultiheadAttention): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__( + self, + embed_dim, + num_heads, + kdim=None, + vdim=None, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + self_attention=False, + encoder_decoder_attention=False, + q_noise=0.0, + qn_block_size=8, + ): + super().__init__( + embed_dim, + num_heads, + kdim, + vdim, + dropout, + bias, + add_bias_kv, + add_zero_attn, + self_attention, + encoder_decoder_attention, + q_noise, + qn_block_size, + ) + + def forward( + self, + query, + key: Optional[Tensor], + value: Optional[Tensor], + key_padding_mask: Optional[Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + need_weights: bool = True, + static_kv: bool = False, + attn_mask: Optional[Tensor] = None, + before_softmax: bool = False, + need_head_weights: bool = False, + position_bias: Optional[Tensor] = None, + ) -> Tuple[Tensor, Optional[Tensor]]: + """Input shape: Time x Batch x Channel + + Args: + key_padding_mask (ByteTensor, optional): mask to exclude + keys that are pads, of shape `(batch, src_len)`, where + padding elements are indicated by 1s. + need_weights (bool, optional): return the attention weights, + averaged over heads (default: False). + attn_mask (ByteTensor, optional): typically used to + implement causal attention, where the mask prevents the + attention from looking forward in time (default: None). + before_softmax (bool, optional): return the raw attention + weights and values before the attention softmax. + need_head_weights (bool, optional): return the attention + weights for each head. Implies *need_weights*. Default: + return the average attention weights over all heads. + """ + if need_head_weights: + need_weights = True + + is_tpu = query.device.type == "xla" + + tgt_len, bsz, embed_dim = query.size() + src_len = tgt_len + assert embed_dim == self.embed_dim, f"query dim {embed_dim} != {self.embed_dim}" + assert list(query.size()) == [tgt_len, bsz, embed_dim] + if key is not None: + src_len, key_bsz, _ = key.size() + if not torch.jit.is_scripting(): + assert key_bsz == bsz + assert value is not None + assert src_len, bsz == value.shape[:2] + + if ( + not self.onnx_trace + and not is_tpu # don't use PyTorch version on TPUs + and incremental_state is None + and not static_kv + # A workaround for quantization to work. Otherwise JIT compilation + # treats bias in linear module as method. + and not torch.jit.is_scripting() + and position_bias is None + ): + assert key is not None and value is not None + return F.multi_head_attention_forward( + query, + key, + value, + self.embed_dim, + self.num_heads, + torch.empty([0]), + torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), + self.bias_k, + self.bias_v, + self.add_zero_attn, + self.dropout_module.p, + self.out_proj.weight, + self.out_proj.bias, + self.training or self.dropout_module.apply_during_inference, + key_padding_mask, + need_weights, + attn_mask, + use_separate_proj_weight=True, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + ) + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if saved_state is not None and "prev_key" in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + q = self.q_proj(query) + k = self.k_proj(query) + v = self.v_proj(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.q_proj(query) + if key is None: + assert value is None + k = v = None + else: + k = self.k_proj(key) + v = self.v_proj(key) + + else: + assert key is not None and value is not None + q = self.q_proj(query) + k = self.k_proj(key) + v = self.v_proj(value) + q *= self.scaling + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + key_padding_mask.new_zeros(key_padding_mask.size(0), 1), + ], + dim=1, + ) + + q = ( + q.contiguous() + .view(tgt_len, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if k is not None: + k = ( + k.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + if v is not None: + v = ( + v.contiguous() + .view(-1, bsz * self.num_heads, self.head_dim) + .transpose(0, 1) + ) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if "prev_key" in saved_state: + _prev_key = saved_state["prev_key"] + assert _prev_key is not None + prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + assert k is not None + k = torch.cat([prev_key, k], dim=1) + src_len = k.size(1) + if "prev_value" in saved_state: + _prev_value = saved_state["prev_value"] + assert _prev_value is not None + prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + assert v is not None + v = torch.cat([prev_value, v], dim=1) + prev_key_padding_mask: Optional[Tensor] = None + if "prev_key_padding_mask" in saved_state: + prev_key_padding_mask = saved_state["prev_key_padding_mask"] + assert k is not None and v is not None + key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( + key_padding_mask=key_padding_mask, + prev_key_padding_mask=prev_key_padding_mask, + batch_size=bsz, + src_len=k.size(1), + static_kv=static_kv, + ) + + saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) + saved_state["prev_key_padding_mask"] = key_padding_mask + # In this branch incremental_state is never None + assert incremental_state is not None + incremental_state = self._set_input_buffer(incremental_state, saved_state) + assert k is not None + assert k.size(1) == src_len + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.dim() == 0: + key_padding_mask = None + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if self.add_zero_attn: + assert v is not None + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1 + ) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [ + key_padding_mask, + torch.zeros(key_padding_mask.size(0), 1).type_as( + key_padding_mask + ), + ], + dim=1, + ) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + if position_bias is not None: ## first order + ## position_bias: [241, 241, 64] + #print ("attn_weights: ", attn_weights.size()) # [492, 241, 241] + reshape_q = q.contiguous().view(bsz * self.num_heads, -1, self.head_dim).transpose(0,1) #[241, 492, 64] + #print ("reshape_q: ", reshape_q.size()) + B = torch.matmul(reshape_q, position_bias.transpose(-2, -1)) + #print ("B: ", B.size()) ## [241, 492, 241] + #B = B.transpose(0, 1).view(bsz, self.num_heads, position_bias.size(0), position_bias.size(1)) + B = B.transpose(0, 1).view(bsz*self.num_heads, position_bias.size(0), position_bias.size(1)) + #print ("B 2: ", B.size()) + attn_weights += B + + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + # don't attend to padding symbols + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + if not is_tpu: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), + float("-inf"), + ) + else: + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf")) + attn_weights = attn_weights.transpose(0, 2) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + if before_softmax: + return attn_weights, v + + attn_weights_float = utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace + ) + attn_weights = attn_weights_float.type_as(attn_weights) + attn_probs = self.dropout_module(attn_weights) + + assert v is not None + attn = torch.bmm(attn_probs, v) + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + if self.onnx_trace and attn.size(1) == 1: + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, embed_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn = self.out_proj(attn) + attn_weights: Optional[Tensor] = None + if need_weights: + attn_weights = attn_weights_float.view( + bsz, self.num_heads, tgt_len, src_len + ).transpose(1, 0) + if not need_head_weights: + # average attention weights over heads + attn_weights = attn_weights.mean(dim=0) + + return attn, attn_weights diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/modules/multimodal_transformer_decoder.py b/SpeechT5/YiTrans/yitrans_iwslt22/modules/multimodal_transformer_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..4d0b5cdd60217a0b27ecb2f60b8bc988e9f4eb65 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/modules/multimodal_transformer_decoder.py @@ -0,0 +1,525 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/transformer/transformer_decoder.py +""" + +import math +from typing import Any, Dict, List, Optional + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqIncrementalDecoder +from fairseq.models.transformer import TransformerConfig +from fairseq.models.transformer.transformer_decoder import module_name_fordropout, Linear +from fairseq.modules import ( + AdaptiveSoftmax, + BaseLayer, + FairseqDropout, + LayerDropModuleList, + LayerNorm, + PositionalEmbedding, + SinusoidalPositionalEmbedding, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from torch import Tensor + +import yitrans_iwslt22.modules.transformer_decoder_layer as transformer_layer +from yitrans_iwslt22.modules.relative_pos_enc import RelativePositionalEncoding + +class MultimodalTransformerDecoderBase(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *cfg.decoder.layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionaries (~fairseq.data.Dictionary): a list of decoding dictionaries + embed_tokens_list (torch.nn.Embedding): a list of output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + cfg, + dictionaries, + embed_tokens_list, + no_encoder_attn=False, + output_projection=None, + use_rel_pos_enc=False, + ): + assert all([embed_tokens.padding_idx == embed_tokens_list[0].padding_idx for embed_tokens in embed_tokens_list]) + assert all([embed_tokens.embedding_dim == embed_tokens_list[0].embedding_dim for embed_tokens in embed_tokens_list]) + self.cfg = cfg + super().__init__(dictionaries) + self.register_buffer("version", torch.Tensor([3])) + self._future_mask = torch.empty(0) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.decoder_layerdrop = cfg.decoder.layerdrop + self.share_input_output_embed = cfg.share_decoder_input_output_embed + + input_embed_dim = embed_tokens_list[0].embedding_dim + embed_dim = cfg.decoder.embed_dim + self.embed_dim = embed_dim + self.output_embed_dim = cfg.decoder.output_dim + + self.padding_idx = embed_tokens_list[0].padding_idx + self.max_target_positions = cfg.max_target_positions + + self.embed_tokens_list = nn.ModuleList(embed_tokens_list) + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + self.embed_positions = ( + PositionalEmbedding( + self.max_target_positions, + embed_dim, + self.padding_idx, + learned=cfg.decoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + self.cross_self_attention = cfg.cross_self_attention + + if self.decoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.decoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.use_rel_pos_enc = use_rel_pos_enc + self.layers.extend( + [ + self.build_decoder_layer(cfg, no_encoder_attn) + for _ in range(cfg.decoder.layers) + ] + ) + self.num_layers = len(self.layers) + + if cfg.decoder.normalize_before and not cfg.no_decoder_final_norm: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + + self.project_out_dim = ( + Linear(embed_dim, self.output_embed_dim, bias=False) + if embed_dim != self.output_embed_dim and not cfg.tie_adaptive_weights + else None + ) + + self.adaptive_softmax = None + self.output_projection = output_projection + if self.output_projection is None: + self.build_output_projection(cfg, dictionaries, embed_tokens_list) + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(embed_dim // cfg.decoder.attention_heads, 24) + + def build_output_projection(self, cfg, dictionaries, embed_tokens_list): + if cfg.adaptive_softmax_cutoff is not None: + self.adaptive_softmax = nn.ModuleList([ + AdaptiveSoftmax( + len(dictionary), + self.output_embed_dim, + utils.eval_str_list(cfg.adaptive_softmax_cutoff, type=int), + dropout=cfg.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if cfg.tie_adaptive_weights else None, + factor=cfg.adaptive_softmax_factor, + tie_proj=cfg.tie_adaptive_proj, + ) for (dictionary, embed_tokens) in zip(dictionaries, embed_tokens_list) + ]) + elif self.share_input_output_embed: + self.output_projection = nn.ModuleList([ + nn.Linear( + self.embed_tokens_list[i].weight.shape[1], + self.embed_tokens_list[i].weight.shape[0], + bias=False, + ) for i in range(len(self.embed_tokens_list)) + ]) + for i in range(len(self.embed_tokens_list)): + self.output_projection[i].weight = self.embed_tokens_list[i].weight + else: + self.output_projection = nn.ModuleList([ + nn.Linear( + self.output_embed_dim, len(dictionary), bias=False + ) for dictionary in dictionaries + ]) + for i in range(len(self.embed_tokens_list)): + nn.init.normal_( + self.output_projection[i].weight, mean=0, std=self.output_embed_dim ** -0.5 + ) + num_base_layers = cfg.base_layers + for i in range(num_base_layers): + self.layers.insert( + ((i + 1) * cfg.decoder.layers) // (num_base_layers + 1), + BaseLayer(cfg), + ) + + def build_decoder_layer(self, cfg, no_encoder_attn=False): + layer = transformer_layer.TransformerDecoderLayerBase(cfg, no_encoder_attn, has_relative_attention_bias=self.use_rel_pos_enc) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + modal_idx=0, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + modal_idx=modal_idx, + ) + + if not features_only: + x = self.output_layer(x, modal_idx) + return x, extra + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + modal_idx=0, + ): + return self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + modal_idx=modal_idx, + ) + + """ + A scriptable subclass of this class has an extract_features method and calls + super().extract_features, but super() is not supported in torchscript. A copy of + this function is made to be used in the subclass instead. + """ + + def extract_features_scriptable( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + modal_idx=0, + ): + """ + Similar to *forward* but only return features. + + Includes several features from "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + bs, slen = prev_output_tokens.size() + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + enc: Optional[Tensor] = None + padding_mask: Optional[Tensor] = None + if encoder_out is not None and len(encoder_out["encoder_out"]) > 0: + enc = encoder_out["encoder_out"][0] + assert ( + enc.size()[1] == bs + ), f"Expected enc.shape == (t, {bs}, c) got {enc.shape}" + if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0: + padding_mask = encoder_out["encoder_padding_mask"][0] + + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens_list[modal_idx](prev_output_tokens) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if self.use_rel_pos_enc: + pos_seq = torch.arange(0, slen).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, _ = self.pos_emb(pos_seq, incremental_state) + else: + pos_k = None + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + + # decoder layers + attn: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = layer( + x, + enc, + padding_mask, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + pos_bias=pos_k, + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + + if attn is not None: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": [attn], "inner_states": inner_states} + + def output_layer(self, features, modal_idx): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + return self.output_projection[modal_idx](features) + else: + return features + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround. + if ( + self._future_mask.size(0) == 0 + or (not self._future_mask.device == tensor.device) + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1 + ) + self._future_mask = self._future_mask.to(tensor) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = "{}.embed_positions.weights".format(name) + if weights_key in state_dict: + del state_dict[weights_key] + state_dict[ + "{}.embed_positions._float_tensor".format(name) + ] = torch.FloatTensor(1) + + if f"{name}.output_projection.weight" not in state_dict: + if self.share_input_output_embed: + embed_out_key = f"{name}.embed_tokens.weight" + else: + embed_out_key = f"{name}.embed_out" + if embed_out_key in state_dict: + state_dict[f"{name}.output_projection.weight"] = state_dict[ + embed_out_key + ] + if not self.share_input_output_embed: + del state_dict[embed_out_key] + + for i in range(self.num_layers): + # update layer norms + layer_norm_map = { + "0": "self_attn_layer_norm", + "1": "encoder_attn_layer_norm", + "2": "final_layer_norm", + } + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m) + if k in state_dict: + state_dict[ + "{}.layers.{}.{}.{}".format(name, i, new, m) + ] = state_dict[k] + del state_dict[k] + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +class MultimodalTransformerDecoder(MultimodalTransformerDecoderBase): + def __init__( + self, + args, + dictionaries, + embed_tokens_list, + no_encoder_attn=False, + output_projection=None, + ): + + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionaries, + embed_tokens_list, + no_encoder_attn=no_encoder_attn, + output_projection=output_projection, + use_rel_pos_enc=getattr(args, "use_rel_pos_enc", False), + ) + + def build_output_projection(self, args, dictionaries, embed_tokens_list): + super().build_output_projection( + TransformerConfig.from_namespace(args), dictionaries, embed_tokens_list + ) + + def build_decoder_layer(self, args, no_encoder_attn=False): + return super().build_decoder_layer( + TransformerConfig.from_namespace(args), no_encoder_attn=no_encoder_attn + ) + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + modal_idx=0, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + modal_idx=modal_idx, + ) + return x, None diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/modules/relative_pos_enc.py b/SpeechT5/YiTrans/yitrans_iwslt22/modules/relative_pos_enc.py new file mode 100644 index 0000000000000000000000000000000000000000..2a073ebf2893e9e9b092aa520bdaf927e9388c2b --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/modules/relative_pos_enc.py @@ -0,0 +1,35 @@ +# -------------------------------------------------------- +# Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/pytorch/fairseq +# -------------------------------------------------------- + +import torch + +class RelativePositionalEncoding(torch.nn.Module): + def __init__(self, d_model, maxlen=1000, embed_v=False): + super(RelativePositionalEncoding, self).__init__() + + self.d_model = d_model + self.maxlen = maxlen + self.pe_k = torch.nn.Embedding(2*maxlen, d_model) + if embed_v: + self.pe_v = torch.nn.Embedding(2*maxlen, d_model) + self.embed_v = embed_v + + + def forward(self, pos_seq, incremental_state=None): + pos_seq[pos_seq < -self.maxlen] = -self.maxlen + pos_seq[pos_seq >= self.maxlen] = self.maxlen - 1 + pos_seq = pos_seq + self.maxlen + + if incremental_state is not None: + pos_seq = pos_seq[-1:] + + if self.embed_v: + return self.pe_k(pos_seq), self.pe_v(pos_seq) + else: + return self.pe_k(pos_seq), None diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/modules/transformer_decoder.py b/SpeechT5/YiTrans/yitrans_iwslt22/modules/transformer_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..29b9f30fc0f026259c8f0ea277e4d60f9d70568d --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/modules/transformer_decoder.py @@ -0,0 +1,523 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/transformer/transformer_decoder.py +""" + +import math +from typing import Any, Dict, List, Optional + +import torch +import torch.nn as nn +from fairseq import utils +from fairseq.distributed import fsdp_wrap +from fairseq.models import FairseqIncrementalDecoder +from fairseq.models.transformer import TransformerConfig +from fairseq.modules import ( + AdaptiveSoftmax, + BaseLayer, + FairseqDropout, + LayerDropModuleList, + LayerNorm, + PositionalEmbedding, + SinusoidalPositionalEmbedding, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_ +from torch import Tensor + +import yitrans_iwslt22.modules.transformer_decoder_layer as transformer_layer +from yitrans_iwslt22.modules.relative_pos_enc import RelativePositionalEncoding + +# rewrite name for backward compatibility in `make_generation_fast_` +def module_name_fordropout(module_name: str) -> str: + if module_name == "TransformerDecoderBase": + return "TransformerDecoder" + else: + return module_name + + +class TransformerDecoderBase(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *cfg.decoder.layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, + cfg, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + use_rel_pos_enc=False, + ): + self.cfg = cfg + super().__init__(dictionary) + self.register_buffer("version", torch.Tensor([3])) + self._future_mask = torch.empty(0) + + self.dropout_module = FairseqDropout( + cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__) + ) + self.decoder_layerdrop = cfg.decoder.layerdrop + self.share_input_output_embed = cfg.share_decoder_input_output_embed + + input_embed_dim = embed_tokens.embedding_dim + embed_dim = cfg.decoder.embed_dim + self.embed_dim = embed_dim + self.output_embed_dim = cfg.decoder.output_dim + + self.padding_idx = embed_tokens.padding_idx + self.max_target_positions = cfg.max_target_positions + + self.embed_tokens = embed_tokens + + self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim) + + if not cfg.adaptive_input and cfg.quant_noise.pq > 0: + self.quant_noise = apply_quant_noise_( + nn.Linear(embed_dim, embed_dim, bias=False), + cfg.quant_noise.pq, + cfg.quant_noise.pq_block_size, + ) + else: + self.quant_noise = None + + self.project_in_dim = ( + Linear(input_embed_dim, embed_dim, bias=False) + if embed_dim != input_embed_dim + else None + ) + self.embed_positions = ( + PositionalEmbedding( + self.max_target_positions, + embed_dim, + self.padding_idx, + learned=cfg.decoder.learned_pos, + ) + if not cfg.no_token_positional_embeddings + else None + ) + if cfg.layernorm_embedding: + self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export) + else: + self.layernorm_embedding = None + + self.cross_self_attention = cfg.cross_self_attention + + if self.decoder_layerdrop > 0.0: + self.layers = LayerDropModuleList(p=self.decoder_layerdrop) + else: + self.layers = nn.ModuleList([]) + self.use_rel_pos_enc = use_rel_pos_enc + self.layers.extend( + [ + self.build_decoder_layer(cfg, no_encoder_attn) + for _ in range(cfg.decoder.layers) + ] + ) + self.num_layers = len(self.layers) + + if cfg.decoder.normalize_before and not cfg.no_decoder_final_norm: + self.layer_norm = LayerNorm(embed_dim, export=cfg.export) + else: + self.layer_norm = None + + self.project_out_dim = ( + Linear(embed_dim, self.output_embed_dim, bias=False) + if embed_dim != self.output_embed_dim and not cfg.tie_adaptive_weights + else None + ) + + self.adaptive_softmax = None + self.output_projection = output_projection + if self.output_projection is None: + self.build_output_projection(cfg, dictionary, embed_tokens) + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(embed_dim // cfg.decoder.attention_heads, 24) + + def build_output_projection(self, cfg, dictionary, embed_tokens): + if cfg.adaptive_softmax_cutoff is not None: + self.adaptive_softmax = AdaptiveSoftmax( + len(dictionary), + self.output_embed_dim, + utils.eval_str_list(cfg.adaptive_softmax_cutoff, type=int), + dropout=cfg.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if cfg.tie_adaptive_weights else None, + factor=cfg.adaptive_softmax_factor, + tie_proj=cfg.tie_adaptive_proj, + ) + elif self.share_input_output_embed: + self.output_projection = nn.Linear( + self.embed_tokens.weight.shape[1], + self.embed_tokens.weight.shape[0], + bias=False, + ) + self.output_projection.weight = self.embed_tokens.weight + else: + self.output_projection = nn.Linear( + self.output_embed_dim, len(dictionary), bias=False + ) + nn.init.normal_( + self.output_projection.weight, mean=0, std=self.output_embed_dim ** -0.5 + ) + num_base_layers = cfg.base_layers + for i in range(num_base_layers): + self.layers.insert( + ((i + 1) * cfg.decoder.layers) // (num_base_layers + 1), + BaseLayer(cfg), + ) + + def build_decoder_layer(self, cfg, no_encoder_attn=False): + layer = transformer_layer.TransformerDecoderLayerBase(cfg, no_encoder_attn, has_relative_attention_bias=self.use_rel_pos_enc) + checkpoint = cfg.checkpoint_activations + if checkpoint: + offload_to_cpu = cfg.offload_activations + layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu) + # if we are checkpointing, enforce that FSDP always wraps the + # checkpointed layer, regardless of layer size + min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0 + layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap) + return layer + + def forward( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + features_only: bool = False, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + src_lengths: Optional[Any] = None, + return_all_hiddens: bool = False, + ): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (optional): output from the encoder, used for + encoder-side attention, should be of size T x B x C + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + features_only (bool, optional): only return features without + applying output layer (default: False). + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + + x, extra = self.extract_features( + prev_output_tokens, + encoder_out=encoder_out, + incremental_state=incremental_state, + full_context_alignment=full_context_alignment, + alignment_layer=alignment_layer, + alignment_heads=alignment_heads, + ) + + if not features_only: + x = self.output_layer(x) + return x, extra + + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + return self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + + """ + A scriptable subclass of this class has an extract_features method and calls + super().extract_features, but super() is not supported in torchscript. A copy of + this function is made to be used in the subclass instead. + """ + + def extract_features_scriptable( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]], + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + """ + Similar to *forward* but only return features. + + Includes several features from "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + full_context_alignment (bool, optional): don't apply + auto-regressive mask to self-attention (default: False). + alignment_layer (int, optional): return mean alignment over + heads at this layer (default: last layer). + alignment_heads (int, optional): only average alignment over + this many heads (default: all heads). + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + bs, slen = prev_output_tokens.size() + if alignment_layer is None: + alignment_layer = self.num_layers - 1 + + enc: Optional[Tensor] = None + padding_mask: Optional[Tensor] = None + if encoder_out is not None and len(encoder_out["encoder_out"]) > 0: + enc = encoder_out["encoder_out"][0] + assert ( + enc.size()[1] == bs + ), f"Expected enc.shape == (t, {bs}, c) got {enc.shape}" + if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0: + padding_mask = encoder_out["encoder_padding_mask"][0] + + # embed positions + positions = None + if self.embed_positions is not None: + positions = self.embed_positions( + prev_output_tokens, incremental_state=incremental_state + ) + + if incremental_state is not None: + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.embed_scale * self.embed_tokens(prev_output_tokens) + + if self.quant_noise is not None: + x = self.quant_noise(x) + + if self.project_in_dim is not None: + x = self.project_in_dim(x) + + if positions is not None: + x += positions + + if self.layernorm_embedding is not None: + x = self.layernorm_embedding(x) + + x = self.dropout_module(x) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + if self.use_rel_pos_enc: + pos_seq = torch.arange(0, slen).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, _ = self.pos_emb(pos_seq, incremental_state) + else: + pos_k = None + + self_attn_padding_mask: Optional[Tensor] = None + if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any(): + self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx) + + # decoder layers + attn: Optional[Tensor] = None + inner_states: List[Optional[Tensor]] = [x] + for idx, layer in enumerate(self.layers): + if incremental_state is None and not full_context_alignment: + self_attn_mask = self.buffered_future_mask(x) + else: + self_attn_mask = None + + x, layer_attn, _ = layer( + x, + enc, + padding_mask, + incremental_state, + self_attn_mask=self_attn_mask, + self_attn_padding_mask=self_attn_padding_mask, + need_attn=bool((idx == alignment_layer)), + need_head_weights=bool((idx == alignment_layer)), + pos_bias=pos_k, + ) + inner_states.append(x) + if layer_attn is not None and idx == alignment_layer: + attn = layer_attn.float().to(x) + + if attn is not None: + if alignment_heads is not None: + attn = attn[:alignment_heads] + + # average probabilities over heads + attn = attn.mean(dim=0) + + if self.layer_norm is not None: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + + return x, {"attn": [attn], "inner_states": inner_states} + + def output_layer(self, features): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + return self.output_projection(features) + else: + return features + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + return min(self.max_target_positions, self.embed_positions.max_positions) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround. + if ( + self._future_mask.size(0) == 0 + or (not self._future_mask.device == tensor.device) + or self._future_mask.size(0) < dim + ): + self._future_mask = torch.triu( + utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1 + ) + self._future_mask = self._future_mask.to(tensor) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = "{}.embed_positions.weights".format(name) + if weights_key in state_dict: + del state_dict[weights_key] + state_dict[ + "{}.embed_positions._float_tensor".format(name) + ] = torch.FloatTensor(1) + + if f"{name}.output_projection.weight" not in state_dict: + if self.share_input_output_embed: + embed_out_key = f"{name}.embed_tokens.weight" + else: + embed_out_key = f"{name}.embed_out" + if embed_out_key in state_dict: + state_dict[f"{name}.output_projection.weight"] = state_dict[ + embed_out_key + ] + if not self.share_input_output_embed: + del state_dict[embed_out_key] + + for i in range(self.num_layers): + # update layer norms + layer_norm_map = { + "0": "self_attn_layer_norm", + "1": "encoder_attn_layer_norm", + "2": "final_layer_norm", + } + for old, new in layer_norm_map.items(): + for m in ("weight", "bias"): + k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m) + if k in state_dict: + state_dict[ + "{}.layers.{}.{}.{}".format(name, i, new, m) + ] = state_dict[k] + del state_dict[k] + + version_key = "{}.version".format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +def Linear(in_features, out_features, bias=True): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) + if bias: + nn.init.constant_(m.bias, 0.0) + return m + + +class TransformerDecoder(TransformerDecoderBase): + def __init__( + self, + args, + dictionary, + embed_tokens, + no_encoder_attn=False, + output_projection=None, + ): + self.args = args + super().__init__( + TransformerConfig.from_namespace(args), + dictionary, + embed_tokens, + no_encoder_attn=no_encoder_attn, + output_projection=output_projection, + use_rel_pos_enc=getattr(args, "use_rel_pos_enc", False), + ) + + def build_output_projection(self, args, dictionary, embed_tokens): + super().build_output_projection( + TransformerConfig.from_namespace(args), dictionary, embed_tokens + ) + + def build_decoder_layer(self, args, no_encoder_attn=False): + return super().build_decoder_layer( + TransformerConfig.from_namespace(args), no_encoder_attn=no_encoder_attn + ) + +class TransformerDecoderScriptable(TransformerDecoder): + def extract_features( + self, + prev_output_tokens, + encoder_out: Optional[Dict[str, List[Tensor]]] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + full_context_alignment: bool = False, + alignment_layer: Optional[int] = None, + alignment_heads: Optional[int] = None, + ): + # call scriptable method from parent class + x, _ = self.extract_features_scriptable( + prev_output_tokens, + encoder_out, + incremental_state, + full_context_alignment, + alignment_layer, + alignment_heads, + ) + return x, None diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/modules/transformer_decoder_layer.py b/SpeechT5/YiTrans/yitrans_iwslt22/modules/transformer_decoder_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..d5397b850a1cbec7f4e092a813cdb79b9c909c9f --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/modules/transformer_decoder_layer.py @@ -0,0 +1,219 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- +""" + Modified from https://github.com/facebookresearch/fairseq/blob/main/fairseq/modules/transformer_layer.py + https://github.com/microsoft/SpeechT5/blob/main/Speech2C/speech2c/models/modules/transformer_decoder_layer.py +""" + +from typing import Dict, List, Optional + +import torch +from torch import Tensor +from fairseq.modules.transformer_layer import TransformerDecoderLayerBase as FairseqTransformerDecoderLayerBase +from fairseq.modules import LayerNorm + +from yitrans_iwslt22.modules.multihead_attention import MultiheadAttention + + +class TransformerDecoderLayerBase(FairseqTransformerDecoderLayerBase): + """Decoder layer block. + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *cfg.decoder.normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__( + self, cfg, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False, has_relative_attention_bias=False + ): + super().__init__( + cfg, + no_encoder_attn, + add_bias_kv, + add_zero_attn, + ) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embed_dim // cfg.decoder.attention_heads) + + def build_self_attention( + self, embed_dim, cfg, add_bias_kv=False, add_zero_attn=False + ): + return MultiheadAttention( + embed_dim, + cfg.decoder.attention_heads, + dropout=cfg.attention_dropout, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=not cfg.cross_self_attention, + q_noise=self.quant_noise, + qn_block_size=self.quant_noise_block_size, + ) + + def forward( + self, + x, + encoder_out: Optional[torch.Tensor] = None, + encoder_padding_mask: Optional[torch.Tensor] = None, + incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None, + prev_self_attn_state: Optional[List[torch.Tensor]] = None, + prev_attn_state: Optional[List[torch.Tensor]] = None, + self_attn_mask: Optional[torch.Tensor] = None, + self_attn_padding_mask: Optional[torch.Tensor] = None, + need_attn: bool = False, + need_head_weights: bool = False, + pos_bias=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor, optional): binary + ByteTensor of shape `(batch, src_len)` where padding + elements are indicated by ``1``. + need_attn (bool, optional): return attention weights + need_head_weights (bool, optional): return attention weights + for each head (default: return average over heads). + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if need_head_weights: + need_attn = True + + residual = x + if self.normalize_before: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + if prev_self_attn_state is not None: + prev_key, prev_value = prev_self_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_self_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_self_attn_state[2] + assert incremental_state is not None + self.self_attn._set_input_buffer(incremental_state, saved_state) + _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state) + if self.cross_self_attention and not ( + incremental_state is not None + and _self_attn_input_buffer is not None + and "prev_key" in _self_attn_input_buffer + ): + if self_attn_mask is not None: + assert encoder_out is not None + self_attn_mask = torch.cat( + (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1 + ) + if self_attn_padding_mask is not None: + if encoder_padding_mask is None: + assert encoder_out is not None + encoder_padding_mask = self_attn_padding_mask.new_zeros( + encoder_out.size(1), encoder_out.size(0) + ) + self_attn_padding_mask = torch.cat( + (encoder_padding_mask, self_attn_padding_mask), dim=1 + ) + assert encoder_out is not None + y = torch.cat((encoder_out, x), dim=0) + else: + y = x + + x, attn = self.self_attn( + query=x, + key=y, + value=y, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + if self.c_attn is not None: + tgt_len, bsz = x.size(0), x.size(1) + x = x.view(tgt_len, bsz, self.nh, self.head_dim) + x = torch.einsum("tbhd,h->tbhd", x, self.c_attn) + x = x.reshape(tgt_len, bsz, self.embed_dim) + if self.attn_ln is not None: + x = self.attn_ln(x) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.self_attn_layer_norm(x) + + if self.encoder_attn is not None and encoder_out is not None: + residual = x + if self.normalize_before: + x = self.encoder_attn_layer_norm(x) + if prev_attn_state is not None: + prev_key, prev_value = prev_attn_state[:2] + saved_state: Dict[str, Optional[Tensor]] = { + "prev_key": prev_key, + "prev_value": prev_value, + } + if len(prev_attn_state) >= 3: + saved_state["prev_key_padding_mask"] = prev_attn_state[2] + assert incremental_state is not None + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=need_attn or (not self.training and self.need_attn), + need_head_weights=need_head_weights, + ) + x = self.dropout_module(x) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.encoder_attn_layer_norm(x) + + residual = x + if self.normalize_before: + x = self.final_layer_norm(x) + + x = self.activation_fn(self.fc1(x)) + x = self.activation_dropout_module(x) + if self.ffn_layernorm is not None: + x = self.ffn_layernorm(x) + x = self.fc2(x) + x = self.dropout_module(x) + if self.w_resid is not None: + residual = torch.mul(self.w_resid, residual) + x = self.residual_connection(x, residual) + if not self.normalize_before: + x = self.final_layer_norm(x) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + assert saved_state is not None + if self_attn_padding_mask is not None: + self_attn_state = [ + saved_state["prev_key"], + saved_state["prev_value"], + saved_state["prev_key_padding_mask"], + ] + else: + self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]] + return x, attn, self_attn_state + return x, attn, None + + def make_generation_fast_(self, need_attn: bool = False, **kwargs): + self.need_attn = need_attn diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/modules/w2v_encoder.py b/SpeechT5/YiTrans/yitrans_iwslt22/modules/w2v_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..7d59a7bfcb5a5b1d02b685c9cfb3c5c2f5cbfa80 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/modules/w2v_encoder.py @@ -0,0 +1,283 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- + +""" + wav2vec encoder adding relitive position bias, modified from + https://github.com/microsoft/SpeechT5/blob/main/Speech2C/speech2c/models/modules/transformer_encoder.py + https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/wav2vec/wav2vec2.py +""" + +import math +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from fairseq import utils +from fairseq.dataclass import ChoiceEnum +from fairseq.modules import ( + LayerNorm, + SamePad, +) +from fairseq.modules.checkpoint_activations import checkpoint_wrapper +from fairseq.modules.transformer_sentence_encoder import init_bert_params +from fairseq.utils import index_put +from fairseq.distributed import fsdp_wrap +from fairseq.models.wav2vec.utils import pad_to_multiple + +## reload multi-head attition with rel-pos-bias +from fairseq.models.wav2vec.wav2vec2 import TransformerEncoder as W2vTransformerEncoder +from yitrans_iwslt22.modules.relative_pos_enc import RelativePositionalEncoding +from yitrans_iwslt22.modules.multihead_attention import MultiheadAttention + +EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"]) +MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"]) + + +class TransformerEncoder(W2vTransformerEncoder): + def __init__(self, args): + super().__init__(args) + + self.dropout = args.dropout + self.embedding_dim = args.encoder_embed_dim + self.required_seq_len_multiple = args.required_seq_len_multiple + self.use_rel_pos_enc = getattr(args, "use_rel_pos_enc", False) + + self.pos_conv = nn.Conv1d( + self.embedding_dim, + self.embedding_dim, + kernel_size=args.conv_pos, + padding=args.conv_pos // 2, + groups=args.conv_pos_groups, + ) + dropout = 0 + std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim)) + nn.init.normal_(self.pos_conv.weight, mean=0, std=std) + nn.init.constant_(self.pos_conv.bias, 0) + + self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2) + self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU()) + + layers = [] + for _ in range(args.encoder_layers): + layer = TransformerSentenceEncoderLayer( + embedding_dim=self.embedding_dim, + ffn_embedding_dim=args.encoder_ffn_embed_dim, + num_attention_heads=args.encoder_attention_heads, + dropout=self.dropout, + attention_dropout=args.attention_dropout, + activation_dropout=args.activation_dropout, + activation_fn=args.activation_fn, + layer_norm_first=args.layer_norm_first, + has_relative_attention_bias=self.use_rel_pos_enc, + ) + if args.checkpoint_activations: + layer = fsdp_wrap(layer) + layer = checkpoint_wrapper(layer) + layers.append(layer) + self.layers = nn.ModuleList(layers) + + self.layer_norm_first = args.layer_norm_first + self.layer_norm = LayerNorm(self.embedding_dim) + self.layerdrop = args.encoder_layerdrop + if self.use_rel_pos_enc: + self.pos_emb = RelativePositionalEncoding(args.encoder_embed_dim // args.encoder_attention_heads, 160) + + + self.apply(init_bert_params) + + def forward(self, x, padding_mask=None, layer=None): + x, layer_results = self.extract_features(x, padding_mask, layer) + + if self.layer_norm_first and layer is None: + x = self.layer_norm(x) + + return x, layer_results + + def extract_features(self, x, padding_mask=None, tgt_layer=None): + + if padding_mask is not None: + x = index_put(x, padding_mask, 0) + + x_conv = self.pos_conv(x.transpose(1, 2)) + x_conv = x_conv.transpose(1, 2) + x = x + x_conv + + if not self.layer_norm_first: + x = self.layer_norm(x) + + # pad to the sequence length dimension + x, pad_length = pad_to_multiple( + x, self.required_seq_len_multiple, dim=-2, value=0 + ) + if pad_length > 0 and padding_mask is None: + padding_mask = x.new_zeros((x.size(0), x.size(1)), dtype=torch.bool) + padding_mask[:, -pad_length:] = True + else: + padding_mask, _ = pad_to_multiple( + padding_mask, self.required_seq_len_multiple, dim=-1, value=True + ) + x = F.dropout(x, p=self.dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + if self.use_rel_pos_enc: + x_len = x.shape[0] + pos_seq = torch.arange(0, x_len).long().to(x.device) + pos_seq = pos_seq[:, None] - pos_seq[None, :] + pos_k, pos_v = self.pos_emb(pos_seq) + else: + pos_k = None + + layer_results = [] + r = None + for i, layer in enumerate(self.layers): + dropout_probability = np.random.random() + if not self.training or (dropout_probability > self.layerdrop): + x, z = layer(x, self_attn_padding_mask=padding_mask, need_weights=False, pos_bias=pos_k) + if tgt_layer is not None: + # unpad if needed + if pad_length > 0: + layer_results.append( + ( + x[:-pad_length], + z[:, :-pad_length, :-pad_length] + if z is not None + else z, + ) + ) + else: + layer_results.append((x, z)) + if i == tgt_layer: + r = x + break + + if r is not None: + x = r + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + # undo paddding + if pad_length > 0: + x = x[:, :-pad_length] + + return x, layer_results + + +class TransformerSentenceEncoderLayer(nn.Module): + """ + Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained + models. + """ + + def __init__( + self, + embedding_dim: float = 768, + ffn_embedding_dim: float = 3072, + num_attention_heads: float = 8, + dropout: float = 0.1, + attention_dropout: float = 0.1, + activation_dropout: float = 0.1, + activation_fn: str = "relu", + layer_norm_first: bool = False, + has_relative_attention_bias: bool = False, + ) -> None: + + super().__init__() + # Initialize parameters + self.embedding_dim = embedding_dim + self.dropout = dropout + self.activation_dropout = activation_dropout + + # Initialize blocks + self.activation_fn = utils.get_activation_fn(activation_fn) + self.self_attn = MultiheadAttention( + self.embedding_dim, + num_attention_heads, + dropout=attention_dropout, + self_attention=True, + ) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(self.activation_dropout) + self.dropout3 = nn.Dropout(dropout) + + self.layer_norm_first = layer_norm_first + + # layer norm associated with the self attention layer + self.self_attn_layer_norm = LayerNorm(self.embedding_dim) + self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim) + self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim) + + # layer norm associated with the position wise feed-forward NN + self.final_layer_norm = LayerNorm(self.embedding_dim) + + if has_relative_attention_bias: + self.norm_k = LayerNorm(self.embedding_dim//num_attention_heads) + + def forward( + self, + x: torch.Tensor, + self_attn_mask: torch.Tensor = None, + self_attn_padding_mask: torch.Tensor = None, + need_weights: bool = False, + att_args=None, + pos_bias=None, + ): + """ + LayerNorm is applied either before or after the self-attention/ffn + modules similar to the original Transformer imlementation. + """ + residual = x + + if self.layer_norm_first: + x = self.self_attn_layer_norm(x) + if pos_bias is not None: + pos_bias = self.norm_k(pos_bias) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + attn_mask=self_attn_mask, + position_bias=pos_bias, + ) + x = self.dropout1(x) + x = residual + x + + residual = x + x = self.final_layer_norm(x) + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + else: + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + position_bias=pos_bias, + ) + + x = self.dropout1(x) + x = residual + x + + x = self.self_attn_layer_norm(x) + + residual = x + x = self.activation_fn(self.fc1(x)) + x = self.dropout2(x) + x = self.fc2(x) + x = self.dropout3(x) + x = residual + x + x = self.final_layer_norm(x) + + return x, attn diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/sequence_generator.py b/SpeechT5/YiTrans/yitrans_iwslt22/sequence_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..5f80669471b5837a14dd3451e546ee273a74ac5c --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/sequence_generator.py @@ -0,0 +1,999 @@ +""" + Modified from + https://github.com/facebookresearch/fairseq/blob/main/fairseq/sequence_generator.py + +""" + +import math +from typing import Dict, List, Optional +import sys + +import torch +import torch.nn as nn +from fairseq import search, utils +from fairseq.data import data_utils +from fairseq.models import FairseqIncrementalDecoder +from torch import Tensor +from fairseq.ngram_repeat_block import NGramRepeatBlock +import numpy + + +class SequenceGenerator(nn.Module): + def __init__( + self, + models, + tgt_dict, + beam_size=1, + max_len_a=0, + max_len_b=200, + max_len=0, + min_len=1, + normalize_scores=True, + len_penalty=1.0, + unk_penalty=0.0, + temperature=1.0, + match_source_len=False, + no_repeat_ngram_size=0, + search_strategy=None, + eos=None, + bos=None, + symbols_to_strip_from_output=None, + lm_model=None, + lm_weight=1.0, + ctc_weight=0.0, + ): + """Generates translations of a given source sentence. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models, + currently support fairseq.models.TransformerModel for scripting + beam_size (int, optional): beam width (default: 1) + max_len_a/b (int, optional): generate sequences of maximum length + ax + b, where x is the source length + max_len (int, optional): the maximum length of the generated output + (not including end-of-sentence) + min_len (int, optional): the minimum length of the generated output + (not including end-of-sentence) + normalize_scores (bool, optional): normalize scores by the length + of the output (default: True) + len_penalty (float, optional): length penalty, where <1.0 favors + shorter, >1.0 favors longer sentences (default: 1.0) + unk_penalty (float, optional): unknown word penalty, where <0 + produces more unks, >0 produces fewer (default: 0.0) + temperature (float, optional): temperature, where values + >1.0 produce more uniform samples and values <1.0 produce + sharper samples (default: 1.0) + match_source_len (bool, optional): outputs should match the source + length (default: False) + """ + super().__init__() + if isinstance(models, EnsembleModel): + self.model = models + else: + self.model = EnsembleModel(models) + self.tgt_dict = tgt_dict + self.pad = tgt_dict.pad() + self.unk = tgt_dict.unk() + self.eos = tgt_dict.eos() if eos is None else eos + self.bos = self.eos if bos is None else bos + self.blank = self.tgt_dict.index("") + self.symbols_to_strip_from_output = ( + symbols_to_strip_from_output.union({self.eos}) + if symbols_to_strip_from_output is not None + else {self.eos} + ) + self.vocab_size = len(tgt_dict) + self.beam_size = beam_size + # the max beam size is the dictionary size - 1, since we never select pad + self.beam_size = min(beam_size, self.vocab_size - 1) + self.max_len_a = max_len_a + self.max_len_b = max_len_b + self.min_len = min_len + self.max_len = max_len or self.model.max_decoder_positions() + + self.normalize_scores = normalize_scores + self.len_penalty = len_penalty + self.unk_penalty = unk_penalty + self.temperature = temperature + self.match_source_len = match_source_len + + if no_repeat_ngram_size > 0: + self.repeat_ngram_blocker = NGramRepeatBlock(no_repeat_ngram_size) + else: + self.repeat_ngram_blocker = None + + assert temperature > 0, "--temperature must be greater than 0" + + self.search = ( + search.BeamSearch(tgt_dict) if search_strategy is None else search_strategy + ) + # We only need to set src_lengths in LengthConstrainedBeamSearch. + # As a module attribute, setting it would break in multithread + # settings when the model is shared. + self.should_set_src_lengths = ( + hasattr(self.search, "needs_src_lengths") and self.search.needs_src_lengths + ) + + self.model.eval() + + self.lm_model = lm_model + self.lm_weight = lm_weight + self.ctc_weight = ctc_weight + if self.lm_model is not None: + self.lm_model.eval() + + def cuda(self): + self.model.cuda() + return self + + @torch.no_grad() + def forward( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + """Generate a batch of translations. + + Args: + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, prefix_tokens, bos_token=bos_token) + + # TODO(myleott): unused, deprecate after pytorch-translate migration + def generate_batched_itr(self, data_itr, beam_size=None, cuda=False, timer=None): + """Iterate over a batched dataset and yield individual translations. + Args: + cuda (bool, optional): use GPU for generation + timer (StopwatchMeter, optional): time generations + """ + for sample in data_itr: + s = utils.move_to_cuda(sample) if cuda else sample + if "net_input" not in s: + continue + input = s["net_input"] + # model.forward normally channels prev_output_tokens into the decoder + # separately, but SequenceGenerator directly calls model.encoder + encoder_input = { + k: v for k, v in input.items() if k != "prev_output_tokens" + } + if timer is not None: + timer.start() + with torch.no_grad(): + hypos = self.generate(encoder_input) + if timer is not None: + timer.stop(sum(len(h[0]["tokens"]) for h in hypos)) + for i, id in enumerate(s["id"].data): + # remove padding + src = utils.strip_pad(input["src_tokens"].data[i, :], self.pad) + ref = ( + utils.strip_pad(s["target"].data[i, :], self.pad) + if s["target"] is not None + else None + ) + yield id, src, ref, hypos[i] + + @torch.no_grad() + def generate( + self, models, sample: Dict[str, Dict[str, Tensor]], **kwargs + ) -> List[List[Dict[str, Tensor]]]: + """Generate translations. Match the api of other fairseq generators. + + Args: + models (List[~fairseq.models.FairseqModel]): ensemble of models + sample (dict): batch + prefix_tokens (torch.LongTensor, optional): force decoder to begin + with these tokens + constraints (torch.LongTensor, optional): force decoder to include + the list of constraints + bos_token (int, optional): beginning of sentence token + (default: self.eos) + """ + return self._generate(sample, **kwargs) + + def _generate( + self, + sample: Dict[str, Dict[str, Tensor]], + prefix_tokens: Optional[Tensor] = None, + constraints: Optional[Tensor] = None, + bos_token: Optional[int] = None, + ): + incremental_states = torch.jit.annotate( + List[Dict[str, Dict[str, Optional[Tensor]]]], + [ + torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) + for i in range(self.model.models_size) + ], + ) + net_input = sample["net_input"] + + if "src_tokens" in net_input: + src_tokens = net_input["src_tokens"] + # length of the source text being the character length except EndOfSentence and pad + src_lengths = ( + (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1) + ) + elif "source" in net_input: + src_tokens = net_input["source"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + elif "features" in net_input: + src_tokens = net_input["features"] + src_lengths = ( + net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) + if net_input["padding_mask"] is not None + else torch.tensor(src_tokens.size(-1)).to(src_tokens) + ) + else: + raise Exception( + "expected src_tokens or source in net input. input keys: " + + str(net_input.keys()) + ) + + # bsz: total number of sentences in beam + # Note that src_tokens may have more than 2 dimensions (i.e. audio features) + bsz, src_len = src_tokens.size()[:2] + beam_size = self.beam_size + + if constraints is not None and not self.search.supports_constraints: + raise NotImplementedError( + "Target-side constraints were provided, but search method doesn't support them" + ) + + # Initialize constraints, when active + self.search.init_constraints(constraints, beam_size) + + max_len: int = -1 + if self.match_source_len: + max_len = src_lengths.max().item() + else: + max_len = min( + int(self.max_len_a * src_len + self.max_len_b), + self.max_len - 1, + ) + assert ( + self.min_len <= max_len + ), "min_len cannot be larger than max_len, please adjust these!" + # compute the encoder output for each beam + with torch.autograd.profiler.record_function("EnsembleModel: forward_encoder"): + encoder_outs = self.model.forward_encoder(net_input) + + dec_sos = sample["lang_idx"] if ("lang_idx" in sample and sample["lang_idx"] is not None) else (self.bos if bos_token is None else bos_token) + # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores + new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) + new_order = new_order.to(src_tokens.device).long() + encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order) + # ensure encoder_outs is a List. + assert encoder_outs is not None + + # initialize buffers + scores = ( + torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float() + ) # +1 for eos; pad is never chosen for scoring + tokens = ( + torch.zeros(bsz * beam_size, max_len + 2) + .to(src_tokens) + .long() + .fill_(self.pad) + ) # +2 for eos and pad + tokens[:, 0] = dec_sos + attn: Optional[Tensor] = None + + # A list that indicates candidates that should be ignored. + # For example, suppose we're sampling and have already finalized 2/5 + # samples. Then cands_to_ignore would mark 2 positions as being ignored, + # so that we only finalize the remaining 3 samples. + cands_to_ignore = ( + torch.zeros(bsz, beam_size).to(src_tokens).eq(-1) + ) # forward and backward-compatible False mask + + # list of completed sentences + finalized = torch.jit.annotate( + List[List[Dict[str, Tensor]]], + [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)], + ) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step + + # a boolean array indicating if the sentence at the index is finished or not + finished = [False for i in range(bsz)] + num_remaining_sent = bsz # number of sentences remaining + + # number of candidate hypos per step + cand_size = 2 * beam_size # 2 x beam size in case half are EOS + + # offset arrays for converting between different indexing schemes + bbsz_offsets = ( + (torch.arange(0, bsz) * beam_size) + .unsqueeze(1) + .type_as(tokens) + .to(src_tokens.device) + ) + cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device) + + reorder_state: Optional[Tensor] = None + batch_idxs: Optional[Tensor] = None + + original_batch_idxs: Optional[Tensor] = None + if "id" in sample and isinstance(sample["id"], Tensor): + original_batch_idxs = sample["id"] + else: + original_batch_idxs = torch.arange(0, bsz).type_as(tokens) + + for step in range(max_len + 1): # one extra step for EOS marker + # reorder decoder internal states based on the prev choice of beams + if reorder_state is not None: + if batch_idxs is not None: + # update beam indices to take into account removed sentences + corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as( + batch_idxs + ) + reorder_state.view(-1, beam_size).add_( + corr.unsqueeze(-1) * beam_size + ) + original_batch_idxs = original_batch_idxs[batch_idxs] + self.model.reorder_incremental_state(incremental_states, reorder_state) + encoder_outs = self.model.reorder_encoder_out( + encoder_outs, reorder_state + ) + with torch.autograd.profiler.record_function( + "EnsembleModel: forward_decoder" + ): + lprobs, avg_attn_scores = self.model.forward_decoder( + tokens[:, : step + 1], + encoder_outs, + incremental_states, + self.temperature, + ) + + if self.lm_model is not None: + lm_out = self.lm_model(tokens[:, : step + 1]) + probs = self.lm_model.get_normalized_probs( + lm_out, log_probs=True, sample=None + ) + probs = probs[:, -1, :] * self.lm_weight + lprobs += probs + # handle prefix tokens (possibly with different lengths) + if ( + prefix_tokens is not None + and step < prefix_tokens.size(1) + and step < max_len + ): + lprobs, tokens, scores = self._prefix_tokens( + step, lprobs, scores, tokens, prefix_tokens, beam_size + ) + elif step < self.min_len: + # minimum length constraint (does not apply if using prefix_tokens) + lprobs[:, self.eos] = -math.inf + + lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs) + + lprobs[:, self.pad] = -math.inf # never select pad + lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty + lprobs[:, self.blank] = -math.inf # never select blank + if dec_sos != self.eos: + lprobs[:, dec_sos] = -math.inf # never select lang id + + # handle max length constraint + if step >= max_len: + lprobs[:, : self.eos] = -math.inf + lprobs[:, self.eos + 1 :] = -math.inf + + # Record attention scores, only support avg_attn_scores is a Tensor + if avg_attn_scores is not None: + if attn is None: + attn = torch.empty( + bsz * beam_size, avg_attn_scores.size(1), max_len + 2 + ).to(scores) + attn[:, :, step + 1].copy_(avg_attn_scores) + + scores = scores.type_as(lprobs) + eos_bbsz_idx = torch.empty(0).to( + tokens + ) # indices of hypothesis ending with eos (finished sentences) + eos_scores = torch.empty(0).to( + scores + ) # scores of hypothesis ending with eos (finished sentences) + + if self.should_set_src_lengths: + self.search.set_src_lengths(src_lengths) + + if self.repeat_ngram_blocker is not None: + lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz, beam_size, step) + + # Shape: (batch, cand_size) + cand_scores, cand_indices, cand_beams = self.search.step( + step, + lprobs.view(bsz, -1, self.vocab_size), + scores.view(bsz, beam_size, -1)[:, :, :step], + tokens[:, : step + 1], + original_batch_idxs, + ) + + # cand_bbsz_idx contains beam indices for the top candidate + # hypotheses, with a range of values: [0, bsz*beam_size), + # and dimensions: [bsz, cand_size] + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + + # finalize hypotheses that end in eos + # Shape of eos_mask: (batch size, beam size) + eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf) + eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask) + + # only consider eos when it's among the top beam_size indices + # Now we know what beam item(s) to finish + # Shape: 1d list of absolute-numbered + eos_bbsz_idx = torch.masked_select( + cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents: List[int] = [] + if eos_bbsz_idx.numel() > 0: + eos_scores = torch.masked_select( + cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size] + ) + + finalized_sents = self.finalize_hypos( + step, + eos_bbsz_idx, + eos_scores, + tokens, + scores, + finalized, + finished, + beam_size, + attn, + src_lengths, + max_len, + ) + num_remaining_sent -= len(finalized_sents) + + assert num_remaining_sent >= 0 + if num_remaining_sent == 0: + break + if self.search.stop_on_max_len and step >= max_len: + break + assert step < max_len, f"{step} < {max_len}" + + # Remove finalized sentences (ones for which {beam_size} + # finished hypotheses have been generated) from the batch. + if len(finalized_sents) > 0: + new_bsz = bsz - len(finalized_sents) + + # construct batch_idxs which holds indices of batches to keep for the next pass + batch_mask = torch.ones( + bsz, dtype=torch.bool, device=cand_indices.device + ) + batch_mask[finalized_sents] = False + # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it + batch_idxs = torch.arange( + bsz, device=cand_indices.device + ).masked_select(batch_mask) + + # Choose the subset of the hypothesized constraints that will continue + self.search.prune_sentences(batch_idxs) + + eos_mask = eos_mask[batch_idxs] + cand_beams = cand_beams[batch_idxs] + bbsz_offsets.resize_(new_bsz, 1) + cand_bbsz_idx = cand_beams.add(bbsz_offsets) + cand_scores = cand_scores[batch_idxs] + cand_indices = cand_indices[batch_idxs] + + if prefix_tokens is not None: + prefix_tokens = prefix_tokens[batch_idxs] + src_lengths = src_lengths[batch_idxs] + cands_to_ignore = cands_to_ignore[batch_idxs] + + scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) + if attn is not None: + attn = attn.view(bsz, -1)[batch_idxs].view( + new_bsz * beam_size, attn.size(1), -1 + ) + bsz = new_bsz + else: + batch_idxs = None + + # Set active_mask so that values > cand_size indicate eos hypos + # and values < cand_size indicate candidate active hypos. + # After, the min values per row are the top candidate active hypos + + # Rewrite the operator since the element wise or is not supported in torchscript. + + eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size])) + active_mask = torch.add( + eos_mask.type_as(cand_offsets) * cand_size, + cand_offsets[: eos_mask.size(1)], + ) + + # get the top beam_size active hypotheses, which are just + # the hypos with the smallest values in active_mask. + # {active_hypos} indicates which {beam_size} hypotheses + # from the list of {2 * beam_size} candidates were + # selected. Shapes: (batch size, beam size) + new_cands_to_ignore, active_hypos = torch.topk( + active_mask, k=beam_size, dim=1, largest=False + ) + + # update cands_to_ignore to ignore any finalized hypos. + cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size] + # Make sure there is at least one active item for each sentence in the batch. + assert (~cands_to_ignore).any(dim=1).all() + + # update cands_to_ignore to ignore any finalized hypos + + # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam + # can be selected more than once). + active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos) + active_scores = torch.gather(cand_scores, dim=1, index=active_hypos) + + active_bbsz_idx = active_bbsz_idx.view(-1) + active_scores = active_scores.view(-1) + + # copy tokens and scores for active hypotheses + + # Set the tokens for each beam (can select the same row more than once) + tokens[:, : step + 1] = torch.index_select( + tokens[:, : step + 1], dim=0, index=active_bbsz_idx + ) + # Select the next token for each of them + tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather( + cand_indices, dim=1, index=active_hypos + ) + if step > 0: + scores[:, :step] = torch.index_select( + scores[:, :step], dim=0, index=active_bbsz_idx + ) + scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather( + cand_scores, dim=1, index=active_hypos + ) + + # Update constraints based on which candidates were selected for the next beam + self.search.update_constraints(active_hypos) + + # copy attention for active hypotheses + if attn is not None: + attn[:, :, : step + 2] = torch.index_select( + attn[:, :, : step + 2], dim=0, index=active_bbsz_idx + ) + + # reorder incremental state in decoder + reorder_state = active_bbsz_idx + + # sort by score descending + for sent in range(len(finalized)): + scores = torch.tensor( + [float(elem["score"].item()) for elem in finalized[sent]] + ) + _, sorted_scores_indices = torch.sort(scores, descending=True) + finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices] + finalized[sent] = torch.jit.annotate( + List[Dict[str, Tensor]], finalized[sent] + ) + return finalized + + def _prefix_tokens( + self, step: int, lprobs, scores, tokens, prefix_tokens, beam_size: int + ): + """Handle prefix tokens""" + prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1) + prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1)) + prefix_mask = prefix_toks.ne(self.pad) + lprobs[prefix_mask] = torch.tensor(-math.inf).to(lprobs) + lprobs[prefix_mask] = lprobs[prefix_mask].scatter( + -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs[prefix_mask] + ) + # if prefix includes eos, then we should make sure tokens and + # scores are the same across all beams + eos_mask = prefix_toks.eq(self.eos) + if eos_mask.any(): + # validate that the first beam matches the prefix + first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[ + :, 0, 1 : step + 1 + ] + eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0] + target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step] + assert (first_beam == target_prefix).all() + + # copy tokens, scores and lprobs from the first beam to all beams + tokens = self.replicate_first_beam(tokens, eos_mask_batch_dim, beam_size) + scores = self.replicate_first_beam(scores, eos_mask_batch_dim, beam_size) + lprobs = self.replicate_first_beam(lprobs, eos_mask_batch_dim, beam_size) + return lprobs, tokens, scores + + def replicate_first_beam(self, tensor, mask, beam_size: int): + tensor = tensor.view(-1, beam_size, tensor.size(-1)) + tensor[mask] = tensor[mask][:, :1, :] + return tensor.view(-1, tensor.size(-1)) + + def finalize_hypos( + self, + step: int, + bbsz_idx, + eos_scores, + tokens, + scores, + finalized: List[List[Dict[str, Tensor]]], + finished: List[bool], + beam_size: int, + attn: Optional[Tensor], + src_lengths, + max_len: int, + ): + """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly. + A sentence is finalized when {beam_size} finished items have been collected for it. + + Returns number of sentences (not beam items) being finalized. + These will be removed from the batch and not processed further. + Args: + bbsz_idx (Tensor): + """ + assert bbsz_idx.numel() == eos_scores.numel() + + # clone relevant token and attention tensors. + # tokens is (batch * beam, max_len). So the index_select + # gets the newly EOS rows, then selects cols 1..{step + 2} + tokens_clone = tokens.index_select(0, bbsz_idx)[ + :, 1 : step + 2 + ] # skip the first index, which is EOS + + tokens_clone[:, step] = self.eos + attn_clone = ( + attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2] + if attn is not None + else None + ) + + # compute scores per token position + pos_scores = scores.index_select(0, bbsz_idx)[:, : step + 1] + pos_scores[:, step] = eos_scores + # convert from cumulative to per-position scores + pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] + + # normalize sentence-level scores + if self.normalize_scores: + eos_scores /= (step + 1) ** self.len_penalty + + # cum_unfin records which sentences in the batch are finished. + # It helps match indexing between (a) the original sentences + # in the batch and (b) the current, possibly-reduced set of + # sentences. + cum_unfin: List[int] = [] + prev = 0 + for f in finished: + if f: + prev += 1 + else: + cum_unfin.append(prev) + cum_fin_tensor = torch.tensor(cum_unfin, dtype=torch.int).to(bbsz_idx) + + unfin_idx = bbsz_idx // beam_size + sent = unfin_idx + torch.index_select(cum_fin_tensor, 0, unfin_idx) + + # Create a set of "{sent}{unfin_idx}", where + # "unfin_idx" is the index in the current (possibly reduced) + # list of sentences, and "sent" is the index in the original, + # unreduced batch + # For every finished beam item + # sentence index in the current (possibly reduced) batch + seen = (sent << 32) + unfin_idx + unique_seen: List[int] = torch.unique(seen).tolist() + + if self.match_source_len: + condition = step > torch.index_select(src_lengths, 0, unfin_idx) + eos_scores = torch.where(condition, torch.tensor(-math.inf), eos_scores) + sent_list: List[int] = sent.tolist() + for i in range(bbsz_idx.size()[0]): + # An input sentence (among those in a batch) is finished when + # beam_size hypotheses have been collected for it + if len(finalized[sent_list[i]]) < beam_size: + if attn_clone is not None: + # remove padding tokens from attn scores + hypo_attn = attn_clone[i] + else: + hypo_attn = torch.empty(0) + + finalized[sent_list[i]].append( + { + "tokens": tokens_clone[i], + "score": eos_scores[i], + "attention": hypo_attn, # src_len x tgt_len + "alignment": torch.empty(0), + "positional_scores": pos_scores[i], + } + ) + + newly_finished: List[int] = [] + for unique_s in unique_seen: + # check termination conditions for this sentence + unique_sent: int = unique_s >> 32 + unique_unfin_idx: int = unique_s - (unique_sent << 32) + + if not finished[unique_sent] and self.is_finished( + step, unique_unfin_idx, max_len, len(finalized[unique_sent]), beam_size + ): + finished[unique_sent] = True + newly_finished.append(unique_unfin_idx) + + return newly_finished + + def is_finished( + self, + step: int, + unfin_idx: int, + max_len: int, + finalized_sent_len: int, + beam_size: int, + ): + """ + Check whether decoding for a sentence is finished, which + occurs when the list of finalized sentences has reached the + beam size, or when we reach the maximum length. + """ + assert finalized_sent_len <= beam_size + if finalized_sent_len == beam_size or step == max_len: + return True + return False + + +class EnsembleModel(nn.Module): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__() + self.models_size = len(models) + # method '__len__' is not supported in ModuleList for torch script + self.single_model = models[0] + self.models = nn.ModuleList(models) + + self.has_incremental: bool = False + if all( + hasattr(m, "decoder") and isinstance(m.decoder, FairseqIncrementalDecoder) + for m in models + ): + self.has_incremental = True + + def forward(self): + pass + + def has_encoder(self): + return hasattr(self.single_model, "encoder") + + def has_incremental_states(self): + return self.has_incremental + + def max_decoder_positions(self): + return min( + [ + m.max_decoder_positions() + for m in self.models + if hasattr(m, "max_decoder_positions") + ] + + [sys.maxsize] + ) + + @torch.jit.export + def forward_encoder(self, net_input: Dict[str, Tensor]): + if not self.has_encoder(): + return None + return [model.encoder.forward_torchscript(net_input) for model in self.models] + + @torch.jit.export + def forward_decoder( + self, + tokens, + encoder_outs: List[Dict[str, List[Tensor]]], + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + temperature: float = 1.0, + ): + log_probs = [] + avg_attn: Optional[Tensor] = None + encoder_out: Optional[Dict[str, List[Tensor]]] = None + for i, model in enumerate(self.models): + if self.has_encoder(): + encoder_out = encoder_outs[i] + # decode each model + if self.has_incremental_states(): + decoder_out = model.decoder.forward( + tokens, + encoder_out=encoder_out, + incremental_state=incremental_states[i], + modal_idx=-1, + ) + else: + if hasattr(model, "decoder"): + decoder_out = model.decoder.forward(tokens, encoder_out=encoder_out) + else: + decoder_out = model.forward(tokens) + + attn: Optional[Tensor] = None + decoder_len = len(decoder_out) + if decoder_len > 1 and decoder_out[1] is not None: + if isinstance(decoder_out[1], Tensor): + attn = decoder_out[1] + else: + attn_holder = decoder_out[1]["attn"] + if isinstance(attn_holder, Tensor): + attn = attn_holder + elif attn_holder is not None: + attn = attn_holder[0] + if attn is not None: + attn = attn[:, -1, :] + + decoder_out_tuple = ( + decoder_out[0][:, -1:, :].div_(temperature), + None if decoder_len <= 1 else decoder_out[1], + ) + probs = model.get_normalized_probs( + decoder_out_tuple, log_probs=True, sample=None + ) + probs = probs[:, -1, :] + if self.models_size == 1: + return probs, attn + + log_probs.append(probs) + if attn is not None: + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + + avg_probs = torch.logsumexp(torch.stack(log_probs, dim=0), dim=0) - math.log( + self.models_size + ) + + if avg_attn is not None: + avg_attn.div_(self.models_size) + return avg_probs, avg_attn + + @torch.jit.export + def reorder_encoder_out( + self, encoder_outs: Optional[List[Dict[str, List[Tensor]]]], new_order + ): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + new_outs: List[Dict[str, List[Tensor]]] = [] + if not self.has_encoder(): + return new_outs + for i, model in enumerate(self.models): + assert encoder_outs is not None + new_outs.append( + model.encoder.reorder_encoder_out(encoder_outs[i], new_order) + ) + return new_outs + + @torch.jit.export + def reorder_incremental_state( + self, + incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]], + new_order, + ): + if not self.has_incremental_states(): + return + for i, model in enumerate(self.models): + model.decoder.reorder_incremental_state_scripting( + incremental_states[i], new_order + ) + + +class SequenceGeneratorWithAlignment(SequenceGenerator): + def __init__( + self, models, tgt_dict, left_pad_target=False, print_alignment="hard", **kwargs + ): + """Generates translations of a given source sentence. + + Produces alignments following "Jointly Learning to Align and + Translate with Transformer Models" (Garg et al., EMNLP 2019). + + Args: + left_pad_target (bool, optional): Whether or not the + hypothesis should be left padded or not when they are + teacher forced for generating alignments. + """ + super().__init__(EnsembleModelWithAlignment(models), tgt_dict, **kwargs) + self.left_pad_target = left_pad_target + + if print_alignment == "hard": + self.extract_alignment = utils.extract_hard_alignment + elif print_alignment == "soft": + self.extract_alignment = utils.extract_soft_alignment + + @torch.no_grad() + def generate(self, models, sample, **kwargs): + finalized = super()._generate(sample, **kwargs) + + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + beam_size = self.beam_size + ( + src_tokens, + src_lengths, + prev_output_tokens, + tgt_tokens, + ) = self._prepare_batch_for_alignment(sample, finalized) + if any(getattr(m, "full_context_alignment", False) for m in self.model.models): + attn = self.model.forward_align(src_tokens, src_lengths, prev_output_tokens) + else: + attn = [ + finalized[i // beam_size][i % beam_size]["attention"].transpose(1, 0) + for i in range(bsz * beam_size) + ] + + if src_tokens.device != "cpu": + src_tokens = src_tokens.to("cpu") + tgt_tokens = tgt_tokens.to("cpu") + attn = [i.to("cpu") for i in attn] + + # Process the attn matrix to extract hard alignments. + for i in range(bsz * beam_size): + alignment = self.extract_alignment( + attn[i], src_tokens[i], tgt_tokens[i], self.pad, self.eos + ) + finalized[i // beam_size][i % beam_size]["alignment"] = alignment + return finalized + + def _prepare_batch_for_alignment(self, sample, hypothesis): + src_tokens = sample["net_input"]["src_tokens"] + bsz = src_tokens.shape[0] + src_tokens = ( + src_tokens[:, None, :] + .expand(-1, self.beam_size, -1) + .contiguous() + .view(bsz * self.beam_size, -1) + ) + src_lengths = sample["net_input"]["src_lengths"] + src_lengths = ( + src_lengths[:, None] + .expand(-1, self.beam_size) + .contiguous() + .view(bsz * self.beam_size) + ) + prev_output_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=True, + ) + tgt_tokens = data_utils.collate_tokens( + [beam["tokens"] for example in hypothesis for beam in example], + self.pad, + self.eos, + self.left_pad_target, + move_eos_to_beginning=False, + ) + return src_tokens, src_lengths, prev_output_tokens, tgt_tokens + + +class EnsembleModelWithAlignment(EnsembleModel): + """A wrapper around an ensemble of models.""" + + def __init__(self, models): + super().__init__(models) + + def forward_align(self, src_tokens, src_lengths, prev_output_tokens): + avg_attn = None + for model in self.models: + decoder_out = model(src_tokens, src_lengths, prev_output_tokens) + attn = decoder_out[1]["attn"][0] + if avg_attn is None: + avg_attn = attn + else: + avg_attn.add_(attn) + if len(self.models) > 1: + avg_attn.div_(len(self.models)) + return avg_attn diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/tasks/iwslt_joint_pretraining.py b/SpeechT5/YiTrans/yitrans_iwslt22/tasks/iwslt_joint_pretraining.py new file mode 100644 index 0000000000000000000000000000000000000000..fbbba14135234e08bef1a35821b12a069dea2c8a --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/tasks/iwslt_joint_pretraining.py @@ -0,0 +1,726 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- +""" + Modified from + https://github.com/facebookresearch/fairseq/blob/main/fairseq/tasks/hubert_pretraining.py + https://github.com/facebookresearch/fairseq/blob/main/fairseq/tasks/denoising.py + + Pre-training task for YiTrans@IWSLT2022 + Step1: Combine Speech2C and multilingual BART + Step2: Combine ASR and multilingual MT +""" +import logging +import os +import sys +from typing import Dict, List, Optional, Tuple +from pathlib import Path + +import numpy as np +from argparse import Namespace +from collections import OrderedDict + +from dataclasses import dataclass, field +from fairseq.data import Dictionary, encoders +from fairseq.data import ( + Dictionary, + data_utils, + StripTokenDataset, + PrependTokenDataset, + AppendTokenDataset, + FairseqDataset, + iterators, + ResamplingDataset, +) +from fairseq.data.audio.speech_to_text_joint_dataset import S2TJointDataConfig +from fairseq.data.shorten_dataset import maybe_shorten_dataset +from fairseq.data.encoders.utils import get_whole_word_mask +from fairseq.dataclass.configs import FairseqDataclass +from fairseq.tasks import register_task +from fairseq.tasks.fairseq_task import FairseqTask +from fairseq.dataclass.constants import ChoiceEnum + +from fairseq.tasks.hubert_pretraining import HubertPretrainingConfig +from yitrans_iwslt22.data.load_langpair_dataset import load_langpair_dataset +from yitrans_iwslt22.data.lang_pair_mask_dataset import LangPairMaskDataset +from yitrans_iwslt22.data.speech2c_dataset import Speech2cDataset +from yitrans_iwslt22.data.denoising_dataset import DenoisingDatasetLang +from yitrans_iwslt22.data.concat_dataset import ConcatDataset +from yitrans_iwslt22.data.multimodal_corpus_dataset import MultiCorpusDataset + + +logger = logging.getLogger(__name__) +TOKENIZER_CHOICES = ChoiceEnum(["sentencepiece", "hubert_letters", "none"]) + +def _lang_token(lang: str): + return "".format(lang) + +def _lang_token_index(dic: Dictionary, lang: str): + """Return language token index.""" + idx = dic.index(_lang_token(lang)) + assert idx != dic.unk_index, "cannot find language token for lang {}".format(lang) + return idx + +class LabelEncoder(object): + def __init__(self, dictionary: Dictionary) -> None: + self.dictionary = dictionary + + def __call__(self, label: str) -> List[str]: + return self.dictionary.encode_line( + label, append_eos=False, add_if_not_exist=False, + ) + +@dataclass +class TextPretrainingConfig(FairseqDataclass): + """ + Convert the legacy config of BART to the Dataclass style + """ + text_data: Optional[str] = field( + default=None, + metadata={ + "help": "if set, path to text data directory", + }, + ) + seed: Optional[int] = field( + default=1, + metadata={ + "help": "for ordered_indices in MulticorpusDataset", + }, + ) + tokens_per_sample: Optional[int] = field( + default=512, + metadata={ + "help": "max number of total tokens over all segments per sample for dataset", + }, + ) + sample_break_mode: Optional[str] = field( + default="eos", + metadata={ + "help": "mode for breaking sentence", + }, + ) + mask: Optional[float] = field( + default=0.3, + metadata={ + "help": "fraction of words/subwords that will be masked", + }, + ) + leave_unmasked_prob: float = field( + default=0.1, + metadata={"help": "probability that a masked token is unmasked"}, + ) + mask_random: Optional[float] = field( + default=0.0, + metadata={ + "help": "instead of using [MASK], use random token this often", + }, + ) + freq_weighted_replacement: bool = field( + default=False, + metadata={"help": "sample random replacement words based on word frequencies"}, + ) + mask_whole_words: bool = field( + default=False, + metadata={"help": "mask whole words; you may also want to set --bpe"}, + ) + mask_multiple_length: int = field( + default=1, + metadata={"help": "repeat the mask indices multiple times"}, + ) + mask_stdev: float = field( + default=0.0, + metadata={"help": "stdev of the mask length"}, + ) + shorten_method: Optional[str] = field( + default="none", + metadata={ + "help": "if not none, shorten sequences that exceed tokens_per_sample", + "choices": "none/truncate/random_crop" + }, + ) + shorten_data_split_list: Optional[str] = field( + default="", + metadata={ + "help": "comma_separated list of dataset splits to apply shortening to, e.g., train,valid (default: all dataset splits)", + }, + ) + ### below hypra-parameters is used in BART + insert: Optional[float] = field( + default=0.0, + metadata={ + "help": "insert this percentage of additional random tokens", + }, + ) + permute: Optional[float] = field( + default=0.0, + metadata={ + "help": "take this proportion of subwords and permute them", + }, + ) + rotate: Optional[float] = field( + default=0.0, + metadata={ + "help": "rotate this proportion of inputs", + }, + ) + poisson_lambda: Optional[float] = field( + default=3, + metadata={ + "help": "randomly shuffle sentences for this proportion of inputs", + }, + ) + permute_sentences: Optional[float] = field( + default=0.0, + metadata={ + "help": "shuffle this proportion of sentences in all inputs", + }, + ) + mask_length: Optional[str] = field( + default="span-poisson", + metadata={ + "help": "mask length to choose", + "choice": "subword/word/span-poisson" + }, + ) + replace_length: Optional[int] = field( + default=1, + metadata={ + "help": "when masking N tokens, replace with 0, 1, or N tokens (use -1 for N)", + }, + ) + shuffle_instance: Optional[bool] = field( + default=False, + metadata={"help": "shuffle instance"}, + ) + max_source_positions: Optional[int] = field( + default=1024, + metadata={"help": "max number of tokens in the source sequence"}, + ) + max_target_positions: Optional[int] = field( + default=1024, + metadata={"help": "max number of tokens in the target sequence"}, + ) + bpe: Optional[str] = field( + default="sentencepiece", + metadata={ + "help": "will wrapped by the text_data_config yaml", + }, + ) + data_config: Optional[str] = field( + default=None, + metadata={ + "help": "a config yaml specify the bpe model of text data", + }, + ) + text_maxtokens_ratio: Optional[float] = field( + default=1.0, + metadata={ + "help": "for text, max_tokens = max_tokens * text_maxtokens_ratio / 320 ", + }, + ) + prepend_tgt_lang_tag: bool = field( + default=True, + metadata={"help": "prepend tgt_lang_tag to replace "}, + ) + mask_text_ratio: Optional[float] = field( + default=0.0, + metadata={ + "help": "mask_text_ratio, for paired data", + }, + ) + + +@dataclass +class JointPretrainingConfig(HubertPretrainingConfig): + store_labels: Optional[bool] = field( + default=False, + metadata={"help": "store spm labels in memory, should be true when fine-tune with bpe"}, + ) + add_decoder: bool = field( + default=False, + metadata={"help": "whether to add decoder for CE Loss on code"}, + ) + split_modality_batch: bool = field( + default=False, + metadata={"help": "whether create all samples of different modalities in a batch"}, + ) + speech_tgt_lang: str = field( + default="", + metadata={"help": "prepend to prev_output_tokens to replace , only used for decoder"}, + ) + speech_sampling_alpha: float = field( + default=0.2, + metadata={ + "help": "Hyper-parameter alpha = 1/T for temperature-based speech resampling." + "(alpha = 1 for no resampling)" + }, + ) + text_sampling_alpha: float = field( + default=0.2, + metadata={ + "help": "Hyper-parameter alpha = 1/T for temperature-based text resampling." + "(alpha = 1 for no resampling)" + }, + ) + hubert_tokenizer: Optional[TOKENIZER_CHOICES] = field( + default="none", + metadata={"help": "which tokenizer for processing text"}, + ) + sp_path: Optional[str] = field( + default=None, + metadata={"help": "sentencepiece model path if using bpe tokenizer"}, + ) + text_cfg: TextPretrainingConfig = TextPretrainingConfig() + + +@register_task("iwslt_joint_pretraining", dataclass=JointPretrainingConfig) +class JointPretrainingTask(FairseqTask): + cfg: JointPretrainingConfig + def __init__( + self, + cfg: JointPretrainingConfig, + ) -> None: + super().__init__(cfg) + + logger.info(f"current directory is {os.getcwd()}") + logger.info(f"JointPretrainingTask Config {cfg}") + + self.cfg = cfg + self.fine_tuning = cfg.fine_tuning + self.blank_symbol = "" + + self.state.add_factory("hubert_tokenizer", self.build_tokenizer) + self.state.add_factory("text_dictionary", self.load_text_dictionary) + self.state.add_factory("text_src_dictionary", self.load_text_src_dictionary) + if cfg.fine_tuning: + self.state.add_factory("target_dictionary", self.load_dictionaries) + else: + self.state.add_factory("dictionaries", self.load_dictionaries) + + if cfg.text_cfg.data_config is not None: + self.text_data_cfg = S2TJointDataConfig(Path(f"{cfg.text_cfg.text_data}/{cfg.text_cfg.data_config}")) + self.cfg.text_cfg.bpe = self.text_data_cfg.bpe_tokenizer["bpe"] + + @property + def source_dictionary(self) -> Optional[Dictionary]: + return None + + @property + def target_dictionary(self) -> Optional[Dictionary]: + return self.state.target_dictionary + + @property + def dictionaries(self) -> List[Dictionary]: + return self.state.dictionaries + + @property + def text_dictionary(self) -> Optional[Dictionary]: + return self.state.text_dictionary + + @property + def text_src_dictionary(self) -> Optional[Dictionary]: + return self.state.text_src_dictionary + + @property + def hubert_tokenizer(self): + return self.state.hubert_tokenizer + + def load_dictionaries(self): + label_dir = self.cfg.data if self.cfg.label_dir is None else self.cfg.label_dir + dictionaries = [Dictionary.load(f"{label_dir}/dict.{label}.txt") for label in self.cfg.labels] + return dictionaries[0] if self.cfg.fine_tuning else dictionaries + + def load_text_dictionary(self): + tgt_dict_path = f"{self.cfg.text_cfg.text_data}/{self.text_data_cfg.vocab_filename}" + if not os.path.isfile(tgt_dict_path): + raise FileNotFoundError(f"Dict not found: {tgt_dict_path}") + text_dictionary = Dictionary.load(tgt_dict_path) + self.mask_idx = text_dictionary.add_symbol("") + return text_dictionary + + def load_text_src_dictionary(self): + return self.load_text_dictionary() + + @classmethod + def setup_task( + cls, cfg: JointPretrainingConfig, **kwargs + ) -> "JointPretrainingTask": + return cls(cfg) + + def get_label_dir(self) -> str: + if self.cfg.label_dir is None: + return self.cfg.data + return self.cfg.label_dir + + def load_dataset(self, split: str, epoch=1, **kwargs) -> None: + """ + Create Wav dataset for audio, and Index dataset for phonemized text, + then concatenate them to by fairseq.data.multi_corpus_dataset.MultiCorpusDataset. + """ + if len(split.split("+")) == 1: + speech_splits = split.split(",") + has_text = False + else: + has_text = True + speech_splits, text_splits = split.split("+") + speech_splits = speech_splits.split(",") + speech_splits = [item for item in speech_splits if item != ''] + text_splits = text_splits.split(",") + text_splits = [item for item in text_splits if item != ''] + logging.info(f"text_splits: {text_splits}") + logging.info(f"speech_splits: {speech_splits}") + + ### 1, create a speech dataset using Speech2cDataset (modified from HubertDataset) + dicts = [self.target_dictionary] if self.cfg.fine_tuning else self.dictionaries + pad_list = [dict.pad() for dict in dicts] + eos_list = [dict.eos() for dict in dicts] + procs = [LabelEncoder(dict) for dict in dicts] + if self.cfg.speech_tgt_lang != "": + tgt_lang_idx = _lang_token_index(dicts[0], self.cfg.speech_tgt_lang) + logger.info(f"Will prepend <{tgt_lang_idx}> at the beginning of prev_output_tokens to replace ") + else: + tgt_lang_idx = None + + speech_dataset = None + mono_dataset = None + paired_dataset = None + + speech_datasets = [] + for speech_split in speech_splits: + # hubert v1: pad_audio=True, random_crop=False; + paths = [f"{self.get_label_dir()}/{speech_split}.{l}" for l in self.cfg.labels] + speech_datasets.append( + Speech2cDataset( + f"{self.cfg.data}/{speech_split}.tsv", + sample_rate=self.cfg.sample_rate, + label_paths=paths, + label_rates=self.cfg.label_rate, + pad_list=pad_list, + eos_list=eos_list, + label_processors=procs, + max_keep_sample_size=self.cfg.max_keep_size, + min_keep_sample_size=self.cfg.min_sample_size, + max_sample_size=self.cfg.max_sample_size, + pad_audio=self.cfg.pad_audio, + normalize=self.cfg.normalize, + store_labels=self.cfg.store_labels, + random_crop=self.cfg.random_crop, + single_target=self.cfg.single_target, + tgt_dict=dicts[0], + add_decoder=self.cfg.add_decoder, + fine_tuning=self.cfg.fine_tuning, + tgt_lang_idx=tgt_lang_idx, + tokenizer=self.hubert_tokenizer, + ) + ) + + if len(speech_datasets) > 1: + if 'train' in speech_splits[0] and self.cfg.speech_sampling_alpha != 1.0: + size_ratios = self._get_size_ratios( + speech_splits, [len(s) for s in speech_datasets], alpha=self.cfg.speech_sampling_alpha + ) + speech_datasets = [ + ResamplingDataset( + d, size_ratio=r, seed=0, epoch=epoch, replace=(r >= 1.0) + ) + for d, r in zip(speech_datasets, size_ratios) + ] + speech_dataset = ConcatDataset(speech_datasets) + elif len(speech_datasets) == 1: + speech_dataset = speech_datasets[0] + + ### 2, create text mono/paired datasets + logger.info(f"split {split} has unpaired text? {has_text}") + if not has_text: + assert speech_dataset is not None + self.datasets[split] = speech_dataset + return + + text_pairs = [ item for item in text_splits if len(item.split(".")[-1].split("-")) > 1 ] + text_monos = [ item for item in text_splits if len(item.split(".")[-1].split("-")) == 1 ] + logging.info(f"text_monos: {text_monos}") + logging.info(f"text_pairs: {text_pairs}") + + ### 2.1, create text mono dataset using DenoisingDatasetLang + mono_datasets = [] + if len(text_monos) > 0: + for text_split in text_monos: + lang = text_split.split('.')[-2] ## e.g. mono_deduped_filt_sort.de_DE.de_DE + mask_whole_words = ( + get_whole_word_mask(Namespace(**self.text_data_cfg.bpe_tokenizer), self.text_dictionary) + if self.cfg.text_cfg.mask_whole_words and lang in ("en_XX", "de_DE") + else None + ) + + mono_dataset = data_utils.load_indexed_dataset( + f"{self.cfg.text_cfg.text_data}/{text_split}", + self.text_dictionary, + combine=True, + ) + mono_dataset = StripTokenDataset(mono_dataset, self.text_dictionary.eos()) + mono_dataset = maybe_shorten_dataset( + mono_dataset, + "xxxxx", + self.cfg.text_cfg.shorten_data_split_list, + self.cfg.text_cfg.shorten_method, + self.cfg.text_cfg.tokens_per_sample - 2, + self.cfg.text_cfg.seed, + ) + logger.info("loaded {} samples from: {}".format(len(mono_dataset), text_split)) + ### prepend bos and eos to dataset + mono_dataset = PrependTokenDataset(mono_dataset, self.text_dictionary.bos()) + mono_dataset = AppendTokenDataset(mono_dataset, self.text_dictionary.eos()) + mono_dataset = DenoisingDatasetLang( + mono_dataset, + mono_dataset.sizes, + self.text_dictionary, + self.mask_idx, + mask_whole_words, + shuffle=self.cfg.text_cfg.shuffle_instance, + seed=self.cfg.text_cfg.seed, + args=self.cfg.text_cfg, + tgt_lang_idx=_lang_token_index(self.text_dictionary, lang) if self.cfg.text_cfg.prepend_tgt_lang_tag else None, + ) + mono_datasets.append(mono_dataset) + + ### 2.2, create paired text datasets using load_langpair_dataset + paired_datasets = [] + if len(text_pairs) > 0: + for text_pair in text_pairs: + text_split, lp = text_pair.rsplit('.', 1) ## e.g. "mt8corpus.de_DE-en_EN" + src, tgt = lp.split("-") + paired_dataset = load_langpair_dataset( + self.cfg.text_cfg.text_data, + text_split, + src, + self.text_src_dictionary, + tgt, + self.text_dictionary, + combine=True, + dataset_impl=None, + upsample_primary=1, + left_pad_source=False, + left_pad_target=False, + max_source_positions=self.cfg.text_cfg.tokens_per_sample, + max_target_positions=self.cfg.text_cfg.tokens_per_sample, + prepend_bos=False, + load_alignments=False, + append_source_id=True if self.cfg.text_cfg.prepend_tgt_lang_tag else False, + lang_format="" if self.cfg.text_cfg.prepend_tgt_lang_tag else "[{}]", + ) + if self.cfg.text_cfg.mask_text_ratio > 0: + # add mask + noise_token_id = self.text_src_dictionary.index("") + paired_dataset = LangPairMaskDataset( + paired_dataset, + src_bos=self.text_src_dictionary.bos(), + src_eos=self.text_src_dictionary.eos(), + noise_id=noise_token_id, + mask_ratio=self.cfg.text_cfg.mask_text_ratio, + ) + paired_datasets.append(paired_dataset) + + + ### 3rd, compose a MultiCorpusDataset + dataset_dict, max_positions_dict, distributions, max_tokens_ratios = self.resample_multi_modality_dataset( + speech_dataset, mono_datasets, paired_datasets, text_monos, text_pairs, epoch=epoch, + ) + self.datasets[split] = MultiCorpusDataset( + dataset_dict, + max_positions=max_positions_dict, + distribution=distributions, + max_tokens_ratio=max_tokens_ratios, + seed=self.cfg.text_cfg.seed, + sort_indices=True, + check_length=False, + ) + + def max_positions(self) -> Tuple[int, int]: + return (sys.maxsize, sys.maxsize) + + def filter_indices_by_size( + self, indices: np.array, *args, **kwargs + ) -> np.array: + return indices + + def get_batch_iterator( + self, + dataset, + max_tokens=None, + max_sentences=None, + max_positions=None, + ignore_invalid_inputs=False, + required_batch_size_multiple=1, + seed=1, + num_shards=1, + shard_id=0, + num_workers=0, + epoch=1, + data_buffer_size=0, + disable_iterator_cache=False, + skip_remainder_batch=False, + grouped_shuffling=False, + update_epoch_batch_itr=False, + ): + """ + A warpper of Faiseq.task.get_batch_iterator, only for pre-training, see + + https://github.com/facebookresearch/fairseq/blob/main/fairseq/tasks/fairseq_task.py + + Returns: + ~fairseq.iterators.EpochBatchIterator: a batched iterator over the + given dataset split + """ + if not isinstance(dataset, MultiCorpusDataset): + return super().get_batch_iterator( + dataset, + max_tokens=max_tokens, + max_sentences=max_sentences, + max_positions=max_positions, + ignore_invalid_inputs=ignore_invalid_inputs, + required_batch_size_multiple=required_batch_size_multiple, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + data_buffer_size=data_buffer_size, + disable_iterator_cache=disable_iterator_cache, + skip_remainder_batch=skip_remainder_batch, + grouped_shuffling=grouped_shuffling, + update_epoch_batch_itr=update_epoch_batch_itr, + ) + + can_reuse_epoch_itr = ( + not disable_iterator_cache + and not update_epoch_batch_itr + and self.can_reuse_epoch_itr(dataset) + ) + if can_reuse_epoch_itr and dataset in self.dataset_to_epoch_iter: + logger.debug("reusing EpochBatchIterator for epoch {}".format(epoch)) + return self.dataset_to_epoch_iter[dataset] + + assert isinstance(dataset, FairseqDataset) + + # initialize the dataset with the correct starting epoch + dataset.set_epoch(epoch) + + # get indices ordered by example size + with data_utils.numpy_seed(seed): + indices = dataset.ordered_indices() + + # create mini-batches with given size constraints + batch_sampler = dataset.get_batch_sampler( + indices, + num_shards, + seed, + max_tokens=max_tokens, + max_sentences=max_sentences, + required_batch_size_multiple=required_batch_size_multiple, + split_modality_batch=self.cfg.split_modality_batch, + ) + + # return a reusable, sharded iterator + epoch_iter = iterators.EpochBatchIterator( + dataset=dataset, + collate_fn=dataset.collater, + batch_sampler=batch_sampler, + seed=seed, + num_shards=num_shards, + shard_id=shard_id, + num_workers=num_workers, + epoch=epoch, + buffer_size=data_buffer_size, + skip_remainder_batch=skip_remainder_batch, + disable_shuffling=True, + grouped_shuffling=grouped_shuffling, + ) + + if can_reuse_epoch_itr: + self.dataset_to_epoch_iter[dataset] = epoch_iter + + return epoch_iter + + @classmethod + def _get_size_ratios(cls, ids: List[str], sizes: List[int], alpha: float = 1.0): + """Size ratios for temperature-based sampling + (https://arxiv.org/abs/1907.05019)""" + _sizes = np.array(sizes) + prob = _sizes / _sizes.sum() + smoothed_prob = prob ** alpha + smoothed_prob = smoothed_prob / smoothed_prob.sum() + size_ratio = (smoothed_prob * _sizes.sum()) / _sizes + + o_str = str({_i: f"{prob[i]:.3f}" for i, _i in enumerate(ids)}) + logger.info(f"original sampling probability: {o_str}") + p_str = str({_i: f"{smoothed_prob[i]:.3f}" for i, _i in enumerate(ids)}) + logger.info(f"balanced sampling probability: {p_str}") + sr_str = str({_id: f"{size_ratio[i]:.3f}" for i, _id in enumerate(ids)}) + logger.info(f"balanced sampling size ratio: {sr_str}") + return size_ratio.tolist() + + def resample_multi_modality_dataset(self, speech_dataset, mono_datasets, paired_datasets, mono_splits, paired_splits, epoch=1, train=True): + assert len(mono_datasets+paired_datasets) > 0, f"No text data loaded!" + + text_datasets = mono_datasets+paired_datasets + if len(text_datasets) > 1 and self.cfg.text_sampling_alpha != 1.0: + size_ratios = self._get_size_ratios( + mono_splits + paired_splits, [len(s) for s in mono_datasets + paired_datasets], alpha=self.cfg.text_sampling_alpha + ) + text_datasets = [ + ResamplingDataset( + d, size_ratio=r, seed=0, epoch=epoch, replace=(r >= 1.0) + ) + for d, r in zip(text_datasets, size_ratios) + ] + + mono_datasets = text_datasets[:len(mono_datasets)] + paired_datasets = text_datasets[len(mono_datasets):] + dataset_list = [speech_dataset] + for datasets in [mono_datasets, paired_datasets]: + if len(datasets) > 0: + dataset_list.append(ConcatDataset(datasets)) + else: + dataset_list.append(None) + + ### match speech/text datasets according to modality + dataset_dict = OrderedDict((name, d) for name, d in zip(["speech", "text_mono", "text_paired"], dataset_list) if d is not None) + max_positions_dict = OrderedDict((name, None) for name in dataset_dict.keys()) + if "text_paired" in dataset_dict: + max_positions_dict["text_paired"] = (self.cfg.text_cfg.tokens_per_sample, self.cfg.text_cfg.tokens_per_sample) + dataset_lens = np.array([len(dataset) for dataset in dataset_dict.values()]) + dataset_avg_sample_lens = np.array([ + sum([dataset.num_tokens(i) for i in np.random.randint(low=0, high=len(dataset), size=10000)]) / 10000.0 + for dataset in dataset_dict.values() + ]) + max_tokens_ratios = [1.0 / 320 / self.cfg.text_cfg.text_maxtokens_ratio] * len(dataset_dict) + + if not "speech" in dataset_dict: + distributions = [l / sum(dataset_lens) for l in dataset_lens] + else: + ## we just keep the batches of speech and non-speech the same + first_ratio = dataset_lens[0] / sum(dataset_lens) + distributions = [max_tokens_ratios[0] * dataset_avg_sample_lens[0] / l for l in dataset_avg_sample_lens] + text_total = sum(dataset_lens[1:]) + distributions = [1.2 * d * n / text_total for d, n in zip(distributions, dataset_lens)] + max_tokens_ratios[0] = 1.0 + distributions[0] = 1.0 + distributions = [first_ratio * d for d in distributions] + + logging.info(f"Number samples of datasets is {dataset_lens}") + logging.info(f"Avg sample length of datasets is {dataset_avg_sample_lens}") + logging.info(f"Sampling distributions is {distributions}") + logging.info(f"Maxtokens ratio is {max_tokens_ratios}") + return dataset_dict, max_positions_dict, distributions, max_tokens_ratios + + def build_tokenizer(self, cfg=None): + logger.info(f"tokenizer: {self.cfg.hubert_tokenizer}") + if self.cfg.hubert_tokenizer != "none": + return encoders.build_bpe(Namespace(**{"bpe": self.cfg.hubert_tokenizer, "sentencepiece_model": self.cfg.sp_path})) + else: + return None diff --git a/SpeechT5/YiTrans/yitrans_iwslt22/tasks/iwslt_translation_from_pretrain.py b/SpeechT5/YiTrans/yitrans_iwslt22/tasks/iwslt_translation_from_pretrain.py new file mode 100644 index 0000000000000000000000000000000000000000..72e0d95be17411ab8877be83c90fd4d7ba6a1091 --- /dev/null +++ b/SpeechT5/YiTrans/yitrans_iwslt22/tasks/iwslt_translation_from_pretrain.py @@ -0,0 +1,252 @@ +# -------------------------------------------------------- +# The YiTrans End-to-End Speech Translation System for IWSLT 2022 Offline Shared Task (https://arxiv.org/abs/2206.05777) +# Github source: https://github.com/microsoft/SpeechT5/tree/main/YiTrans +# Copyright (c) 2022 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Based on fairseq code bases +# https://github.com/facebookresearch/fairseq +# -------------------------------------------------------- +""" + Modified from + https://github.com/facebookresearch/fairseq/blob/main/fairseq/tasks/translation.py + +""" + +import torch +import logging +from dataclasses import dataclass, field +from typing import List, Optional, NamedTuple + +from fairseq import utils +from fairseq.data import LanguagePairDataset, TransformEosLangPairDataset, FairseqDataset +from fairseq.tasks import register_task +from fairseq.tasks.translation import TranslationTask, TranslationConfig + +from yitrans_iwslt22.data.concat_dataset import ConcatDataset +from yitrans_iwslt22.data.load_langpair_dataset import load_langpair_dataset + +logger = logging.getLogger(__name__) + + + +class LangPairStripDataset(FairseqDataset): + def __init__( + self, + dataset: LanguagePairDataset, + src_eos: int, + src_bos: Optional[int] = None, + noise_id: Optional[int] = -1, + mask_ratio: Optional[float] = 0, + mask_type: Optional[str] = "random", + ): + self.dataset = dataset + self.src_eos = src_eos + self.src_bos = src_bos + self.noise_id = noise_id + self.mask_ratio = mask_ratio + self.mask_type = mask_type + assert mask_type in ("random", "tail") + + @property + def src_sizes(self): + return self.dataset.src_sizes + + @property + def tgt_sizes(self): + return self.dataset.tgt_sizes + + @property + def sizes(self): + # dataset.sizes can be a dynamically computed sizes: + return self.dataset.sizes + + def get_batch_shapes(self): + return self.dataset.buckets + + def num_tokens_vec(self, indices): + return self.dataset.num_tokens_vec(indices) + + def __len__(self): + return len(self.dataset) + + def num_tokens(self, index): + return self.dataset.num_tokens(index) + + def size(self, index): + return self.dataset.size(index) + + def ordered_indices(self): + return self.dataset.ordered_indices() + + @property + def supports_prefetch(self): + return getattr(self.dataset, "supports_prefetch", False) + + def prefetch(self, indices): + return self.dataset.prefetch(indices) + + def mask_src_tokens(self, sample): + src_item = sample["source"] + mask = None + if self.mask_type == "random": + mask = torch.rand(len(src_item)).le(self.mask_ratio) + else: + mask = torch.ones(len(src_item)) + mask[: int(len(src_item) * (1 - self.mask_ratio))] = 0 + mask = mask.eq(1) + mask[-1] = False + if src_item[0] == self.src_bos: + mask[0] = False + if src_item[-2] == self.src_eos: + mask[-2] = False + no_mask = ~mask + mask_src_item = src_item[no_mask] + smp = sample + smp["source"] = mask_src_item + print(f"{len(src_item)}: {src_item}") + print(f"{len(mask_src_item)}: {mask_src_item}") + return smp + + def __getitem__(self, index): + sample = self.dataset[index] + if self.mask_ratio > 0: + sample = self.mask_src_tokens(sample) + return sample + + def collater(self, samples, pad_to_length=None): + return self.dataset.collater(samples, pad_to_length=pad_to_length) + + +@dataclass +class AddTranslationConfig(TranslationConfig): + langs: str = "" + prepend_bos: bool = False + normalize: bool = False + append_source_id: bool = False + mask_text_ratio: float = 0 + ### ShrinkingDataset related, not used + shrink_start_epoch: int = 0 + shrink_end_epoch: int = 0 + shrink_start_ratio: float = 1.0 + shrink_end_ratio: float = 1.0 + + +@register_task("iwslt_translation_from_pretrained", dataclass=AddTranslationConfig) +class TranslationFromPretrainedTask(TranslationTask): + args: AddTranslationConfig + + def __init__(self, args: AddTranslationConfig, src_dict, tgt_dict): + super().__init__(args, src_dict, tgt_dict) + self.args = args + self.langs = args.langs.split(",") + for d in [src_dict, tgt_dict]: + for l in self.langs: + d.add_symbol("[{}]".format(l)) + d.add_symbol("") + + + def load_dataset(self, split, epoch=1, combine=False, **kwargs): + """Load a given dataset split. + + Args: + split (str): name of the split (e.g., train, valid, test) + """ + paths = utils.split_paths(self.args.data) + assert len(paths) > 0 + data_path = paths[(epoch - 1) % len(paths)] + + # infer langcode + src, tgt = self.args.source_lang, self.args.target_lang + + paired_datasets = [] + for sub_split in split.split(","): + paired_dataset= load_langpair_dataset( + data_path, + sub_split, + src, + self.src_dict, + tgt, + self.tgt_dict, + combine=combine, + dataset_impl=self.args.dataset_impl, + upsample_primary=self.args.upsample_primary, + left_pad_source=self.args.left_pad_source, + left_pad_target=self.args.left_pad_target, + max_source_positions=getattr(self.args, "max_source_positions", 1024), + max_target_positions=getattr(self.args, "max_target_positions", 1024), + load_alignments=self.args.load_alignments, + prepend_bos=getattr(self.args, "prepend_bos", False), + append_source_id=getattr(self.args, "append_source_id", False), + ) + if not split.startswith("valid") and getattr(self.args, "mask_text_ratio", 0) > 0 and not sub_split.startswith("asr_"): + mask_text_ratio = getattr(self.args, "mask_text_ratio", 0) + noise_token_id = self.src_dict.index("") + logger.info(f"Masking {sub_split} at a probability: {mask_text_ratio}") + paired_dataset = LangPairStripDataset( + paired_dataset, + src_bos=self.src_dict.bos(), + src_eos=self.src_dict.eos(), + noise_id=noise_token_id, + mask_ratio=mask_text_ratio, + ) + paired_datasets.append(paired_dataset) + paired_dataset = paired_datasets[0] if len(paired_datasets) == 1 else ConcatDataset(paired_datasets, 1) + + if getattr(self.args, "append_source_id", False): + logger.info(f"Appending to the end of samples") + self.datasets[split] = paired_dataset + else: + logger.info(f"Replacing with for prev_output_tokens") + self.datasets[split] = TransformEosLangPairDataset( + paired_dataset, + src_eos=self.src_dict.eos(), + tgt_bos=self.tgt_dict.eos(), # 'prev_output_tokens' starts with eos + new_tgt_bos=self.tgt_dict.index("[{}]".format(tgt)), + ) + + def build_generator(self, models, args, **unused): + if getattr(args, "score_reference", False): + from fairseq.sequence_scorer import SequenceScorer + + return SequenceScorer( + self.target_dictionary, + eos=self.tgt_dict.index("[{}]".format(self.args.target_lang)), + ) + else: + from yitrans_iwslt22.sequence_generator import SequenceGenerator + + return SequenceGenerator( + models, + self.target_dictionary, + beam_size=getattr(args, "beam", 5), + max_len_a=getattr(args, "max_len_a", 0), + max_len_b=getattr(args, "max_len_b", 200), + min_len=getattr(args, "min_len", 1), + normalize_scores=(not getattr(args, "unnormalized", False)), + len_penalty=getattr(args, "lenpen", 1), + unk_penalty=getattr(args, "unkpen", 0), + temperature=getattr(args, "temperature", 1.0), + match_source_len=getattr(args, "match_source_len", False), + no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0), + eos=self.tgt_dict.index("[{}]".format(self.args.target_lang)) if getattr(self.args, "append_source_id", False) else None, + bos=None if getattr(self.args, "append_source_id", False) else self.tgt_dict.index("[{}]".format(self.args.target_lang)) + ) + + def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None): + if getattr(self.args, "append_source_id", False): + src_lang_id = self.source_dictionary.index("[{}]".format(self.args.source_lang)) + source_tokens = [] + for s_t in src_tokens: + s_t = torch.cat([s_t, s_t.new(1).fill_(src_lang_id)]) + source_tokens.append(s_t) + else: + source_tokens = src_tokens + + dataset = LanguagePairDataset( + source_tokens, + src_lengths, + self.source_dictionary, + tgt_dict=self.target_dictionary, + constraints=constraints, + ) + return dataset