sanchit-gandhi HF staff commited on
Commit
2fb38c9
1 Parent(s): 4b48f59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -79,7 +79,7 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
79
 
80
 
81
  def format_as_transcription(raw_segments):
82
- return "\n".join(
83
  [
84
  f"{chunk['speaker']} [{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
85
  for chunk in raw_segments
@@ -247,7 +247,9 @@ def transcribe_yt(yt_url, task="transcribe", group_by_speaker=True, progress=gr.
247
 
248
  title = "Whisper JAX + Speaker Diarization ⚡️"
249
 
250
- description = """Combine the speed of Whisper JAX with pyannote speaker diarization to transcribe meetings in super fast time.
 
 
251
  """
252
 
253
  article = "Whisper large-v2 model by OpenAI. Speaker diarization model by pyannote. Whisper JAX backend running JAX on a TPU v4-8 through the generous support of the [TRC](https://sites.research.google/trc/about/) programme. Whisper JAX [code](https://github.com/sanchit-gandhi/whisper-jax) and Gradio demo by 🤗 Hugging Face."
@@ -297,8 +299,8 @@ youtube = gr.Interface(
297
  ],
298
  allow_flagging="never",
299
  title=title,
300
- examples=[["https://www.youtube.com/watch?v=m8u-18Q0s7I", True]],
301
- cache_examples=False,
302
  description=description,
303
  article=article,
304
  )
 
79
 
80
 
81
  def format_as_transcription(raw_segments):
82
+ return "\n\n".join(
83
  [
84
  f"{chunk['speaker']} [{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
85
  for chunk in raw_segments
 
247
 
248
  title = "Whisper JAX + Speaker Diarization ⚡️"
249
 
250
+ description = """Combine the speed of Whisper JAX with pyannote speaker diarization to transcribe meetings in super fast time. Demo uses Whisper JAX as an [endpoint](https://twitter.com/sanchitgandhi99/status/1656665496463495168) and pyannote speaker diarization running locally. The Whisper JAX endpoint is run asynchronously, meaning speaker diarization is run in parallel to the speech transcription. The diarized timestamps are aligned with the Whisper output to give the final speaker-segmented transcription.
251
+
252
+ To duplicate the demo, first accept the pyannote terms of use for the [speaker diarization](https://huggingface.co/pyannote/speaker-diarization) and [segmentation](https://huggingface.co/pyannote/segmentation) models. Then, click [here](https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-diarization?duplicate=true) to duplicate the demo, and enter your Hugging Face access token as a Space secret when prompted.
253
  """
254
 
255
  article = "Whisper large-v2 model by OpenAI. Speaker diarization model by pyannote. Whisper JAX backend running JAX on a TPU v4-8 through the generous support of the [TRC](https://sites.research.google/trc/about/) programme. Whisper JAX [code](https://github.com/sanchit-gandhi/whisper-jax) and Gradio demo by 🤗 Hugging Face."
 
299
  ],
300
  allow_flagging="never",
301
  title=title,
302
+ examples=[["https://www.youtube.com/watch?v=m8u-18Q0s7I", "transcribe", True]],
303
+ cache_examples=True,
304
  description=description,
305
  article=article,
306
  )