amritsar commited on
Commit
cd70190
·
verified ·
1 Parent(s): b581197

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -34
app.py CHANGED
@@ -1,46 +1,33 @@
1
  import gradio as gr
2
- from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
- from pytube import YouTube
4
- import torch
5
- import os
6
 
7
- # Load Whisper model
8
- model_id = "openai/whisper-large-v2"
9
- processor = WhisperProcessor.from_pretrained(model_id)
10
- model = WhisperForConditionalGeneration.from_pretrained(model_id)
11
 
12
- def transcribe_youtube_video(youtube_url):
 
 
 
 
 
 
 
13
  try:
14
- # Download audio from YouTube
15
- yt = YouTube(youtube_url)
16
- audio_stream = yt.streams.filter(only_audio=True).first()
17
- audio_file_path = audio_stream.download(filename="audio.mp4")
18
-
19
- # Load and preprocess the audio
20
- import librosa
21
- audio, _ = librosa.load(audio_file_path, sr=16000)
22
- input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
23
-
24
- # Generate token ids
25
- predicted_ids = model.generate(input_features)
26
-
27
- # Decode token ids to text
28
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
29
-
30
- # Clean up
31
- os.remove(audio_file_path)
32
-
33
- return transcription
34
  except Exception as e:
35
  return f"Error: {str(e)}"
36
 
37
  # Gradio interface
38
  iface = gr.Interface(
39
- fn=transcribe_youtube_video,
40
- inputs=gr.Textbox(label="YouTube Video URL"),
41
- outputs=gr.Textbox(label="Transcription"),
42
- title="YouTube Video Transcription",
43
- description="Enter a YouTube video URL to transcribe its audio."
44
  )
45
 
46
  iface.launch()
 
1
  import gradio as gr
2
+ from transformers import Wav2Vec2BertForCTC, pipeline
3
+ from m4t_processor_with_lm import M4TProcessorWithLM
 
 
4
 
5
+ # Load the Punjabi ASR model
6
+ model_id = 'kdcyberdude/w2v-bert-punjabi'
7
+ processor = M4TProcessorWithLM.from_pretrained(model_id)
8
+ model = Wav2Vec2BertForCTC.from_pretrained(model_id)
9
 
10
+ pipe = pipeline('automatic-speech-recognition',
11
+ model=model,
12
+ tokenizer=processor.tokenizer,
13
+ feature_extractor=processor.feature_extractor,
14
+ decoder=processor.decoder,
15
+ return_timestamps='word')
16
+
17
+ def transcribe_audio(audio_file):
18
  try:
19
+ output = pipe(audio_file, chunk_length_s=20, stride_length_s=(4, 4))
20
+ return output['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  except Exception as e:
22
  return f"Error: {str(e)}"
23
 
24
  # Gradio interface
25
  iface = gr.Interface(
26
+ fn=transcribe_audio,
27
+ inputs=gr.Audio(source="upload", type="filepath"),
28
+ outputs=gr.Textbox(label="Punjabi Transcription"),
29
+ title="Punjabi Audio Transcription",
30
+ description="Upload an audio file to transcribe Punjabi speech."
31
  )
32
 
33
  iface.launch()