Victorlopo21 commited on
Commit
6fa0583
1 Parent(s): 32c6ac4

Upload whisper_youtube.py

Browse files
Files changed (1) hide show
  1. whisper_youtube.py +75 -0
whisper_youtube.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """whisper_youtube.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1spmA-7Un5TA6ahuCeO62BUS_ME6zPuUx
8
+
9
+ # Using gradio for making a nice UI.
10
+ Youtube link version.
11
+
12
+ Installing requirements.
13
+ """
14
+
15
+ !pip install gradio
16
+ !pip install git+https://github.com/huggingface/transformers
17
+
18
+ !pip install pytube
19
+
20
+ from pytube import YouTube
21
+ from transformers import pipeline
22
+ import gradio as gr
23
+ import os
24
+
25
+ from transformers import WhisperProcessor
26
+
27
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small", language="Galician", task="transcribe")
28
+
29
+
30
+ from transformers import WhisperTokenizer
31
+
32
+ tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="Galician", task="transcribe")
33
+
34
+ """## Building a Demo
35
+
36
+ Now that we've fine-tuned our model we can build a demo to show
37
+ off its ASR capabilities! We'll make use of 🤗 Transformers
38
+ `pipeline`, which will take care of the entire ASR pipeline,
39
+ right from pre-processing the audio inputs to decoding the
40
+ model predictions.
41
+
42
+ Running the example below will generate a Gradio demo where can input audio to
43
+ our fine-tuned Whisper model to transcribe the corresponding text:
44
+ """
45
+
46
+ pipe = pipeline(model="Victorlopo21/whisper-medium-gl-30") # change to "your-username/the-name-you-picked"
47
+
48
+ def get_audio(url):
49
+ yt = YouTube(url)
50
+ video = yt.streams.filter(only_audio=True)[1]
51
+ out_file=video.download(output_path=".")
52
+ base, ext = os.path.splitext(out_file)
53
+ new_file = base+'.wav'
54
+ os.rename(out_file, new_file)
55
+ a = new_file
56
+ return a
57
+
58
+ def transcribe_url(url):
59
+ text = pipe(get_audio(url))['text']
60
+ return text
61
+
62
+ iface = gr.Interface(
63
+ fn=transcribe_url,
64
+ inputs='text',
65
+ outputs="text",
66
+ title="Whisper Medium Galician",
67
+ description="Realtime demo for Galician speech recognition using a fine-tuned Whisper medium model.",
68
+ )
69
+
70
+ iface.launch(debug=True)
71
+
72
+ # Short youtube video to hear
73
+ # https://www.youtube.com/watch?v=Z2SjeZJZi6s&ab_channel=rimc7
74
+
75
+ # TO TRY