Update README.md
Browse files
README.md
CHANGED
@@ -255,6 +255,13 @@ This version of ZeroSwot is trained with ASR data from CommonVoice, and adapting
|
|
255 |
|
256 |
## Usage
|
257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
```python
|
259 |
from transformers import Wav2Vec2Processor, NllbTokenizer, AutoModel, AutoModelForSeq2SeqLM
|
260 |
import torchaudio
|
@@ -271,21 +278,21 @@ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60
|
|
271 |
tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
|
272 |
|
273 |
# Load ZeroSwot Encoder
|
274 |
-
commit_hash = "
|
275 |
zeroswot_encoder = AutoModel.from_pretrained(
|
276 |
"johntsi/ZeroSwot-Medium_asr-cv_en-to-200", trust_remote_code=True, revision=commit_hash,
|
277 |
)
|
278 |
-
|
279 |
-
|
280 |
|
281 |
# Load NLLB Model
|
282 |
nllb_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
283 |
nllb_model.eval()
|
284 |
nllb_model.to("cuda")
|
285 |
|
286 |
-
# Load
|
287 |
-
audio = load_and_resample_audio(
|
288 |
-
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").cuda
|
289 |
|
290 |
# translation to German
|
291 |
compressed_embeds, attention_mask = zeroswot_encoder(**input_values)
|
|
|
255 |
|
256 |
## Usage
|
257 |
|
258 |
+
The usage is tested with python 3.9.16 and Transformer v4.41.2. Install also torchaudio and sentencepiece for processing.
|
259 |
+
|
260 |
+
```bash
|
261 |
+
pip install transformers torchaudio sentencepiece
|
262 |
+
```
|
263 |
+
|
264 |
+
|
265 |
```python
|
266 |
from transformers import Wav2Vec2Processor, NllbTokenizer, AutoModel, AutoModelForSeq2SeqLM
|
267 |
import torchaudio
|
|
|
278 |
tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
|
279 |
|
280 |
# Load ZeroSwot Encoder
|
281 |
+
commit_hash = "eafabee295ea1c8b45483d1fd26bd747d9a7d937"
|
282 |
zeroswot_encoder = AutoModel.from_pretrained(
|
283 |
"johntsi/ZeroSwot-Medium_asr-cv_en-to-200", trust_remote_code=True, revision=commit_hash,
|
284 |
)
|
285 |
+
zeroswot_encoder.eval()
|
286 |
+
zeroswot_encoder.to("cuda")
|
287 |
|
288 |
# Load NLLB Model
|
289 |
nllb_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
290 |
nllb_model.eval()
|
291 |
nllb_model.to("cuda")
|
292 |
|
293 |
+
# Load audio file
|
294 |
+
audio = load_and_resample_audio(path_to_audio_file)
|
295 |
+
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").to("cuda")
|
296 |
|
297 |
# translation to German
|
298 |
compressed_embeds, attention_mask = zeroswot_encoder(**input_values)
|