Spaces:
Build error
Build error
Victorlopo21
commited on
Commit
•
6fa0583
1
Parent(s):
32c6ac4
Upload whisper_youtube.py
Browse files- whisper_youtube.py +75 -0
whisper_youtube.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""whisper_youtube.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1spmA-7Un5TA6ahuCeO62BUS_ME6zPuUx
|
8 |
+
|
9 |
+
# Using gradio for making a nice UI.
|
10 |
+
Youtube link version.
|
11 |
+
|
12 |
+
Installing requirements.
|
13 |
+
"""
|
14 |
+
|
15 |
+
!pip install gradio
|
16 |
+
!pip install git+https://github.com/huggingface/transformers
|
17 |
+
|
18 |
+
!pip install pytube
|
19 |
+
|
20 |
+
from pytube import YouTube
|
21 |
+
from transformers import pipeline
|
22 |
+
import gradio as gr
|
23 |
+
import os
|
24 |
+
|
25 |
+
from transformers import WhisperProcessor
|
26 |
+
|
27 |
+
processor = WhisperProcessor.from_pretrained("openai/whisper-small", language="Galician", task="transcribe")
|
28 |
+
|
29 |
+
|
30 |
+
from transformers import WhisperTokenizer
|
31 |
+
|
32 |
+
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="Galician", task="transcribe")
|
33 |
+
|
34 |
+
"""## Building a Demo
|
35 |
+
|
36 |
+
Now that we've fine-tuned our model we can build a demo to show
|
37 |
+
off its ASR capabilities! We'll make use of 🤗 Transformers
|
38 |
+
`pipeline`, which will take care of the entire ASR pipeline,
|
39 |
+
right from pre-processing the audio inputs to decoding the
|
40 |
+
model predictions.
|
41 |
+
|
42 |
+
Running the example below will generate a Gradio demo where can input audio to
|
43 |
+
our fine-tuned Whisper model to transcribe the corresponding text:
|
44 |
+
"""
|
45 |
+
|
46 |
+
pipe = pipeline(model="Victorlopo21/whisper-medium-gl-30") # change to "your-username/the-name-you-picked"
|
47 |
+
|
48 |
+
def get_audio(url):
|
49 |
+
yt = YouTube(url)
|
50 |
+
video = yt.streams.filter(only_audio=True)[1]
|
51 |
+
out_file=video.download(output_path=".")
|
52 |
+
base, ext = os.path.splitext(out_file)
|
53 |
+
new_file = base+'.wav'
|
54 |
+
os.rename(out_file, new_file)
|
55 |
+
a = new_file
|
56 |
+
return a
|
57 |
+
|
58 |
+
def transcribe_url(url):
|
59 |
+
text = pipe(get_audio(url))['text']
|
60 |
+
return text
|
61 |
+
|
62 |
+
iface = gr.Interface(
|
63 |
+
fn=transcribe_url,
|
64 |
+
inputs='text',
|
65 |
+
outputs="text",
|
66 |
+
title="Whisper Medium Galician",
|
67 |
+
description="Realtime demo for Galician speech recognition using a fine-tuned Whisper medium model.",
|
68 |
+
)
|
69 |
+
|
70 |
+
iface.launch(debug=True)
|
71 |
+
|
72 |
+
# Short youtube video to hear
|
73 |
+
# https://www.youtube.com/watch?v=Z2SjeZJZi6s&ab_channel=rimc7
|
74 |
+
|
75 |
+
# TO TRY
|