amritsar commited on
Commit
b32079a
·
verified ·
1 Parent(s): 5d70d44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -21
app.py CHANGED
@@ -1,12 +1,13 @@
1
  import gradio as gr
2
- from transformers import AutoProcessor, SeamlessM4Tv2Model
3
  from pytube import YouTube
 
4
  import os
5
 
6
- # Load the SeamlessM4T model and processor for Punjabi transcription
7
- model_id = "facebook/seamless-m4t-v2-large"
8
- processor = AutoProcessor.from_pretrained(model_id)
9
- model = SeamlessM4Tv2Model.from_pretrained(model_id)
10
 
11
  def transcribe_youtube_video(youtube_url):
12
  try:
@@ -14,31 +15,32 @@ def transcribe_youtube_video(youtube_url):
14
  yt = YouTube(youtube_url)
15
  audio_stream = yt.streams.filter(only_audio=True).first()
16
  audio_file_path = audio_stream.download(filename="audio.mp4")
17
-
18
- # Process the downloaded audio
19
- audio_inputs = processor(audios=audio_file_path, return_tensors="pt")
20
 
21
- # Generate transcription
22
- output = model.generate(**audio_inputs)
23
-
24
- # Decode the output to get transcription text
25
- transcription = processor.decode(output[0], skip_special_tokens=True)
26
 
27
- # Clean up by removing the downloaded audio file
 
 
 
 
 
 
28
  os.remove(audio_file_path)
29
-
30
- return transcription
31
-
32
  except Exception as e:
33
  return f"Error: {str(e)}"
34
 
35
- # Gradio interface for inputting a YouTube URL
36
  iface = gr.Interface(
37
  fn=transcribe_youtube_video,
38
  inputs=gr.Textbox(label="YouTube Video URL"),
39
- outputs=gr.Textbox(label="Punjabi Transcription"),
40
- title="Punjabi YouTube Video Transcription",
41
- description="Enter a YouTube video URL to download and transcribe its Punjabi audio."
42
  )
43
 
44
  iface.launch()
 
1
  import gradio as gr
2
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
  from pytube import YouTube
4
+ import torch
5
  import os
6
 
7
+ # Load Whisper model
8
+ model_id = "openai/whisper-small"
9
+ processor = WhisperProcessor.from_pretrained(model_id)
10
+ model = WhisperForConditionalGeneration.from_pretrained(model_id)
11
 
12
  def transcribe_youtube_video(youtube_url):
13
  try:
 
15
  yt = YouTube(youtube_url)
16
  audio_stream = yt.streams.filter(only_audio=True).first()
17
  audio_file_path = audio_stream.download(filename="audio.mp4")
 
 
 
18
 
19
+ # Load and preprocess the audio
20
+ import librosa
21
+ audio, _ = librosa.load(audio_file_path, sr=16000)
22
+ input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
 
23
 
24
+ # Generate token ids
25
+ predicted_ids = model.generate(input_features)
26
+
27
+ # Decode token ids to text
28
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
29
+
30
+ # Clean up
31
  os.remove(audio_file_path)
32
+
33
+ return transcription[0]
 
34
  except Exception as e:
35
  return f"Error: {str(e)}"
36
 
37
+ # Gradio interface
38
  iface = gr.Interface(
39
  fn=transcribe_youtube_video,
40
  inputs=gr.Textbox(label="YouTube Video URL"),
41
+ outputs=gr.Textbox(label="Transcription"),
42
+ title="YouTube Video Transcription",
43
+ description="Enter a YouTube video URL to transcribe its audio."
44
  )
45
 
46
  iface.launch()