k4d3
/

toolkit

Safetensors

Model card Files Files and versions Community

k4d3 commited on Oct 6, 2024

Commit

7174111

•

1 Parent(s): 1f9dab2

whisper2

Browse files

Signed-off-by: Balazs Horvath <acsipont@gmail.com>

Files changed (1) hide show

whisper2 +72 -0

whisper2 ADDED Viewed

	@@ -0,0 +1,72 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+import sys
+import os
+import warnings
+# Suppress specific warnings
+warnings.filterwarnings("ignore", category=FutureWarning)
+warnings.filterwarnings("ignore", category=UserWarning)
+MODEL_NAME = "openai/whisper-large-v3"
+BATCH_SIZE = 8
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    MODEL_NAME, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
+)
+model.to(device)
+processor = AutoProcessor.from_pretrained(MODEL_NAME)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    #    max_new_tokens=448,
+    chunk_length_s=30,
+    batch_size=BATCH_SIZE,
+    return_timestamps=True,
+    torch_dtype=torch_dtype,
+    device=device,
+)
+def transcribe(audio_file_path, task="transcribe"):
+    if not os.path.exists(audio_file_path):
+        print(f"Error: The file '{audio_file_path}' does not exist.")
+        return
+    try:
+        with torch.no_grad():
+            result = pipe(audio_file_path, generate_kwargs={"task": task})
+        from pprint import pprint
+        pprint(result)
+        return result["text"]
+    except Exception as e:
+        print(f"Error during transcription: {str(e)}")
+        return None
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python script.py <audio_file_path> [task]")
+        print("task can be 'transcribe' or 'translate' (default is 'transcribe')")
+        sys.exit(1)
+    audio_file_path = sys.argv[1]
+    task = sys.argv[2] if len(sys.argv) > 2 else "transcribe"
+    if task not in ["transcribe", "translate"]:
+        print("Error: task must be either 'transcribe' or 'translate'")
+        sys.exit(1)
+    result = transcribe(audio_file_path, task)
+    if result:
+        print("Transcription result:")
+        print(result)