Spaces:

oza75
/

bambara-asr

Running on Zero

App Files Files Community

Aboubacar OUATTARA - kaira commited on Apr 26

Commit

54440ac

•

1 Parent(s): 7dc5f48

initial commit

Browse files

Files changed (4) hide show

app.py +50 -0
bambara_utils.py +46 -0
packages.txt +1 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import spaces
+import torch
+from transformers import pipeline
+import gradio as gr
+from bambara_utils import BambaraWhisperTokenizer
+# Determine the appropriate device (GPU or CPU)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Define the model checkpoint and language
+model_checkpoint = "oza75/whisper-bambara-asr-001"
+language = "bambara"
+# Load the custom tokenizer designed for Bambara and the ASR model
+tokenizer = BambaraWhisperTokenizer.from_pretrained(model_checkpoint, language=language, device=device)
+pipe = pipeline(model=model_checkpoint, tokenizer=tokenizer, device=device)
+@spaces.GPU()
+def transcribe(audio):
+    """
+    Transcribes the provided audio file into text using the configured ASR pipeline.
+    Args:
+        audio: The path to the audio file to transcribe.
+    Returns:
+        A string representing the transcribed text.
+    """
+    # Use the pipeline to perform transcription
+    text = pipe(audio)["text"]
+    return text
+def main():
+    # Setup Gradio interface
+    iface = gr.Interface(
+        fn=transcribe,
+        inputs=gr.Audio(type="filepath"),
+        outputs="text",
+        title="Bambara Automatic Speech Recognition",
+        description="Realtime demo for Bambara speech recognition based on a fine-tuning of the Whisper model."
+    )
+    # Launch the interface
+    iface.launch(share=False)
+if __name__ == "__main__":
+    main()

bambara_utils.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from typing import List
+from tokenizers import AddedToken
+from transformers import WhisperTokenizer, WhisperProcessor
+from transformers.models.whisper.tokenization_whisper import TO_LANGUAGE_CODE, TASK_IDS
+CUSTOM_TO_LANGUAGE_CODE = {**TO_LANGUAGE_CODE, "bambara": "bm"}
+class BambaraWhisperTokenizer(WhisperTokenizer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.add_tokens(AddedToken(content="<|bm|>", lstrip=False, rstrip=False, normalized=False, special=True))
+    @property
+    def prefix_tokens(self) -> List[int]:
+        bos_token_id = self.convert_tokens_to_ids("<|startoftranscript|>")
+        translate_token_id = self.convert_tokens_to_ids("<|translate|>")
+        transcribe_token_id = self.convert_tokens_to_ids("<|transcribe|>")
+        notimestamps_token_id = self.convert_tokens_to_ids("<|notimestamps|>")
+        if self.language is not None:
+            self.language = self.language.lower()
+            if self.language in CUSTOM_TO_LANGUAGE_CODE:
+                language_id = CUSTOM_TO_LANGUAGE_CODE[self.language]
+            elif self.language in CUSTOM_TO_LANGUAGE_CODE.values():
+                language_id = self.language
+            else:
+                is_language_code = len(self.language) == 2
+                raise ValueError(
+                    f"Unsupported language: {self.language}. Language should be one of:"
+                    f" {list(CUSTOM_TO_LANGUAGE_CODE.values()) if is_language_code else list(CUSTOM_TO_LANGUAGE_CODE.keys())}."
+                )
+        if self.task is not None:
+            if self.task not in TASK_IDS:
+                raise ValueError(f"Unsupported task: {self.task}. Task should be in: {TASK_IDS}")
+        bos_sequence = [bos_token_id]
+        if self.language is not None:
+            bos_sequence.append(self.convert_tokens_to_ids(f"<|{language_id}|>"))
+        if self.task is not None:
+            bos_sequence.append(transcribe_token_id if self.task == "transcribe" else translate_token_id)
+        if not self.predict_timestamps:
+            bos_sequence.append(notimestamps_token_id)
+        return bos_sequence

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+datasets[audio]
+transformers
+accelerate
+evaluate
+jiwer
+tensorboard
+gradio
+spaces