Spaces:
Runtime error
Runtime error
Commit
β’
72e513f
1
Parent(s):
2ab8d12
Update app.py
Browse files
app.py
CHANGED
@@ -13,13 +13,13 @@ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
|
13 |
attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
|
14 |
|
15 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
16 |
-
"openai/whisper-large-
|
17 |
)
|
18 |
distilled_model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
19 |
-
"distil-whisper/distil-large-
|
20 |
)
|
21 |
|
22 |
-
processor = AutoProcessor.from_pretrained("openai/whisper-large-
|
23 |
|
24 |
model.to(device)
|
25 |
distilled_model.to(device)
|
@@ -44,7 +44,7 @@ distil_pipe = pipeline(
|
|
44 |
tokenizer=processor.tokenizer,
|
45 |
feature_extractor=processor.feature_extractor,
|
46 |
max_new_tokens=128,
|
47 |
-
chunk_length_s=
|
48 |
torch_dtype=torch_dtype,
|
49 |
device=device,
|
50 |
generate_kwargs={"language": "en", "task": "transcribe"},
|
@@ -115,13 +115,13 @@ if __name__ == "__main__":
|
|
115 |
)
|
116 |
gr.HTML(
|
117 |
f"""
|
118 |
-
<p><a href="https://huggingface.co/distil-whisper/distil-large-
|
119 |
-
of the <a href="https://huggingface.co/openai/whisper-large-
|
120 |
Distil-Whisper runs 6x faster with 50% fewer parameters, while performing to within 1% word error rate (WER) on
|
121 |
out-of-distribution evaluation data.</p>
|
122 |
|
123 |
<p>In this demo, we perform a speed comparison between Whisper and Distil-Whisper in order to test this claim.
|
124 |
-
Both models use the <a href="https://huggingface.co/distil-whisper/distil-large-
|
125 |
in π€ Transformers, as well as Flash Attention. To use Distil-Whisper yourself, check the code examples on the
|
126 |
<a href="https://github.com/huggingface/distil-whisper#1-usage"> Distil-Whisper repository</a>. To ensure fair
|
127 |
usage of the Space, we ask that audio file inputs are kept to < 30 mins.</p>
|
|
|
13 |
attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
|
14 |
|
15 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
16 |
+
"openai/whisper-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
|
17 |
)
|
18 |
distilled_model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
19 |
+
"distil-whisper/distil-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
|
20 |
)
|
21 |
|
22 |
+
processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
|
23 |
|
24 |
model.to(device)
|
25 |
distilled_model.to(device)
|
|
|
44 |
tokenizer=processor.tokenizer,
|
45 |
feature_extractor=processor.feature_extractor,
|
46 |
max_new_tokens=128,
|
47 |
+
chunk_length_s=25,
|
48 |
torch_dtype=torch_dtype,
|
49 |
device=device,
|
50 |
generate_kwargs={"language": "en", "task": "transcribe"},
|
|
|
115 |
)
|
116 |
gr.HTML(
|
117 |
f"""
|
118 |
+
<p><a href="https://huggingface.co/distil-whisper/distil-large-v3"> Distil-Whisper</a> is a distilled variant
|
119 |
+
of the <a href="https://huggingface.co/openai/whisper-large-v3"> Whisper</a> model by OpenAI. Compared to Whisper,
|
120 |
Distil-Whisper runs 6x faster with 50% fewer parameters, while performing to within 1% word error rate (WER) on
|
121 |
out-of-distribution evaluation data.</p>
|
122 |
|
123 |
<p>In this demo, we perform a speed comparison between Whisper and Distil-Whisper in order to test this claim.
|
124 |
+
Both models use the <a href="https://huggingface.co/distil-whisper/distil-large-v3#chunked-long-form-transcription"> chunked long-form transcription algorithm</a>
|
125 |
in π€ Transformers, as well as Flash Attention. To use Distil-Whisper yourself, check the code examples on the
|
126 |
<a href="https://github.com/huggingface/distil-whisper#1-usage"> Distil-Whisper repository</a>. To ensure fair
|
127 |
usage of the Space, we ask that audio file inputs are kept to < 30 mins.</p>
|