VinayHajare commited on
Commit
c99e855
·
1 Parent(s): ae1d5fa

Update app.py

Browse files

Updated the app.py and fix some bugs occuring while transcribing YouTube video

Files changed (1) hide show
  1. app.py +19 -10
app.py CHANGED
@@ -38,7 +38,6 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
38
  # we have a malformed timestamp so just return it as is
39
  return seconds
40
 
41
-
42
  def transcribe(file, task, return_timestamps):
43
  outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
44
  text = outputs["text"]
@@ -59,14 +58,20 @@ def _return_yt_html_embed(yt_url):
59
  )
60
  return HTML_str
61
 
62
- def yt_transcribe(yt_url):
63
  yt = pt.YouTube(yt_url)
64
  html_embed_str = _return_yt_html_embed(yt_url)
65
  stream = yt.streams.filter(only_audio=True)[0]
66
  stream.download(filename="audio.mp3")
67
-
68
- text = pipe("audio.mp3")["text"]
69
-
 
 
 
 
 
 
70
  return html_embed_str, text
71
 
72
  demo = gr.Blocks()
@@ -84,7 +89,7 @@ mic_transcribe = gr.Interface(
84
  title="Whisper Demo: Transcribe Marathi Audio",
85
  description=(
86
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
87
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
88
  " of arbitrary length."
89
  ),
90
  allow_flagging="never",
@@ -103,7 +108,7 @@ file_transcribe = gr.Interface(
103
  title="Whisper Demo: Transcribe Marathi Audio",
104
  description=(
105
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
106
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
107
  " of arbitrary length."
108
  ),
109
  cache_examples=True,
@@ -112,20 +117,24 @@ file_transcribe = gr.Interface(
112
 
113
  yt_transcribe = gr.Interface(
114
  fn=yt_transcribe,
115
- inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
 
 
 
 
116
  outputs=["html", "text"],
117
  layout="horizontal",
118
  theme="huggingface",
119
  title="Whisper Demo: Transcribe Marathi YouTube Video",
120
  description=(
121
  "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
122
- f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
123
  " arbitrary length."
124
  ),
125
  allow_flagging="never",
126
  )
127
 
128
  with demo:
129
- gr.TabbedInterface([mic_transcribe, file_transcribe,yt_transcribe], ["Transcribe Microphone", "Transcribe Audio File", "Transcribe YouTube Video"])
130
 
131
  demo.launch(enable_queue=True)
 
38
  # we have a malformed timestamp so just return it as is
39
  return seconds
40
 
 
41
  def transcribe(file, task, return_timestamps):
42
  outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
43
  text = outputs["text"]
 
58
  )
59
  return HTML_str
60
 
61
+ def yt_transcribe(yt_url, task, return_timestamps):
62
  yt = pt.YouTube(yt_url)
63
  html_embed_str = _return_yt_html_embed(yt_url)
64
  stream = yt.streams.filter(only_audio=True)[0]
65
  stream.download(filename="audio.mp3")
66
+ outputs = pipe("audio.mp3",batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
67
+ text = outputs["text"]
68
+ if return_timestamps:
69
+ timestamps = outputs["chunks"]
70
+ timestamps = [
71
+ f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
72
+ for chunk in timestamps
73
+ ]
74
+ text = "\n".join(str(feature) for feature in timestamps)
75
  return html_embed_str, text
76
 
77
  demo = gr.Blocks()
 
89
  title="Whisper Demo: Transcribe Marathi Audio",
90
  description=(
91
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
92
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
93
  " of arbitrary length."
94
  ),
95
  allow_flagging="never",
 
108
  title="Whisper Demo: Transcribe Marathi Audio",
109
  description=(
110
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
111
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
112
  " of arbitrary length."
113
  ),
114
  cache_examples=True,
 
117
 
118
  yt_transcribe = gr.Interface(
119
  fn=yt_transcribe,
120
+ inputs=[
121
+ gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube Video URL"),
122
+ gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
123
+ gr.inputs.Checkbox(default=False, label="Return timestamps"),
124
+ ],
125
  outputs=["html", "text"],
126
  layout="horizontal",
127
  theme="huggingface",
128
  title="Whisper Demo: Transcribe Marathi YouTube Video",
129
  description=(
130
  "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
131
+ f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
132
  " arbitrary length."
133
  ),
134
  allow_flagging="never",
135
  )
136
 
137
  with demo:
138
+ gr.TabbedInterface([mic_transcribe, file_transcribe, yt_transcribe], ["Transcribe Microphone", "Transcribe Audio File", "Transcribe YouTube Video"])
139
 
140
  demo.launch(enable_queue=True)