sanchit-gandhi HF staff commited on
Commit
72e513f
β€’
1 Parent(s): 2ab8d12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -13,13 +13,13 @@ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
13
  attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
14
 
15
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
16
- "openai/whisper-large-v2", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
17
  )
18
  distilled_model = AutoModelForSpeechSeq2Seq.from_pretrained(
19
- "distil-whisper/distil-large-v2", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
20
  )
21
 
22
- processor = AutoProcessor.from_pretrained("openai/whisper-large-v2")
23
 
24
  model.to(device)
25
  distilled_model.to(device)
@@ -44,7 +44,7 @@ distil_pipe = pipeline(
44
  tokenizer=processor.tokenizer,
45
  feature_extractor=processor.feature_extractor,
46
  max_new_tokens=128,
47
- chunk_length_s=15,
48
  torch_dtype=torch_dtype,
49
  device=device,
50
  generate_kwargs={"language": "en", "task": "transcribe"},
@@ -115,13 +115,13 @@ if __name__ == "__main__":
115
  )
116
  gr.HTML(
117
  f"""
118
- <p><a href="https://huggingface.co/distil-whisper/distil-large-v2"> Distil-Whisper</a> is a distilled variant
119
- of the <a href="https://huggingface.co/openai/whisper-large-v2"> Whisper</a> model by OpenAI. Compared to Whisper,
120
  Distil-Whisper runs 6x faster with 50% fewer parameters, while performing to within 1% word error rate (WER) on
121
  out-of-distribution evaluation data.</p>
122
 
123
  <p>In this demo, we perform a speed comparison between Whisper and Distil-Whisper in order to test this claim.
124
- Both models use the <a href="https://huggingface.co/distil-whisper/distil-large-v2#long-form-transcription"> chunked long-form transcription algorithm</a>
125
  in πŸ€— Transformers, as well as Flash Attention. To use Distil-Whisper yourself, check the code examples on the
126
  <a href="https://github.com/huggingface/distil-whisper#1-usage"> Distil-Whisper repository</a>. To ensure fair
127
  usage of the Space, we ask that audio file inputs are kept to < 30 mins.</p>
 
13
  attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
14
 
15
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
16
+ "openai/whisper-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
17
  )
18
  distilled_model = AutoModelForSpeechSeq2Seq.from_pretrained(
19
+ "distil-whisper/distil-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
20
  )
21
 
22
+ processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
23
 
24
  model.to(device)
25
  distilled_model.to(device)
 
44
  tokenizer=processor.tokenizer,
45
  feature_extractor=processor.feature_extractor,
46
  max_new_tokens=128,
47
+ chunk_length_s=25,
48
  torch_dtype=torch_dtype,
49
  device=device,
50
  generate_kwargs={"language": "en", "task": "transcribe"},
 
115
  )
116
  gr.HTML(
117
  f"""
118
+ <p><a href="https://huggingface.co/distil-whisper/distil-large-v3"> Distil-Whisper</a> is a distilled variant
119
+ of the <a href="https://huggingface.co/openai/whisper-large-v3"> Whisper</a> model by OpenAI. Compared to Whisper,
120
  Distil-Whisper runs 6x faster with 50% fewer parameters, while performing to within 1% word error rate (WER) on
121
  out-of-distribution evaluation data.</p>
122
 
123
  <p>In this demo, we perform a speed comparison between Whisper and Distil-Whisper in order to test this claim.
124
+ Both models use the <a href="https://huggingface.co/distil-whisper/distil-large-v3#chunked-long-form-transcription"> chunked long-form transcription algorithm</a>
125
  in πŸ€— Transformers, as well as Flash Attention. To use Distil-Whisper yourself, check the code examples on the
126
  <a href="https://github.com/huggingface/distil-whisper#1-usage"> Distil-Whisper repository</a>. To ensure fair
127
  usage of the Space, we ask that audio file inputs are kept to < 30 mins.</p>