Spaces:

thak123
/

Whisper-Konkani

Sleeping

App Files Files Community

thak123 commited on 10 days ago

Commit

caaee3e

•

1 Parent(s): 8a1e498

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -72

app.py CHANGED Viewed

@@ -1,55 +1,55 @@
-# from transformers import WhisperTokenizer
-# import os
-# tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"
-# from transformers import pipeline
-# import gradio as gr
-# import torch
-# pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
-#                 task="automatic-speech-recognition", tokenizer= tokenizer)  # change to "your-username/the-name-you-picked"
-# # pipe.model.config.forced_decoder_ids = (
-# #         pipe.tokenizer.get_decoder_prompt_ids(
-# #             language="marathi", task="transcribe"
-# #         )
-# #     )
-# def transcribe_speech(filepath):
-#     output = pipe(
-#         filepath,
-#         max_new_tokens=256,
-#         generate_kwargs={
-#             "task": "transcribe",
-#             "language": "konkani",
-#         },  # update with the language you've fine-tuned on
-#         chunk_length_s=30,
-#         batch_size=8,
-#         padding=True
-#     )
-#     return output["text"]
-# demo = gr.Blocks()
-# mic_transcribe = gr.Interface(
-#     fn=transcribe_speech,
-#     inputs=gr.Audio(sources="microphone", type="filepath"),
-#     outputs=gr.components.Textbox(),
-# )
-# file_transcribe = gr.Interface(
-#     fn=transcribe_speech,
-#     inputs=gr.Audio(sources="upload", type="filepath"),
-#     outputs=gr.components.Textbox(),
-# )
-# with demo:
-#     gr.TabbedInterface(
-#         [mic_transcribe, file_transcribe],
-#         ["Transcribe Microphone", "Transcribe Audio File"],
-#     )
-# demo.launch(debug=True)
 # # def transcribe(audio):
 # #     # text = pipe(audio)["text"]
@@ -75,31 +75,31 @@
 # # iface.launch()
-from transformers import WhisperTokenizer, pipeline
-import gradio as gr
-import os
-tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="marathi", task="transcribe")
-pipe = pipeline(model="thak123/gom-stt-v3", task="automatic-speech-recognition", tokenizer=tokenizer)
-def transcribe(audio):
-    result = pipe(audio)
-    text = result[0]['text']
-    print("op", text)
-    return text
-iface = gr.Interface(
-    fn=transcribe,
-    inputs=[gr.Audio(sources=["microphone", "upload"])],
-    outputs="text",
-    examples=[
-        [os.path.join(os.path.dirname("."), "audio/chalyaami.mp3")],
-        [os.path.join(os.path.dirname("."), "audio/ekdonteen.flac")],
-        [os.path.join(os.path.dirname("."), "audio/heyatachadjaale.mp3")],
-    ],
-    title="Whisper Konkani",
-    description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
-)
-iface.launch()

+from transformers import WhisperTokenizer
+import os
+tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"
+from transformers import pipeline
+import gradio as gr
+import torch
+pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
+                task="automatic-speech-recognition", tokenizer= tokenizer)  # change to "your-username/the-name-you-picked"
+# pipe.model.config.forced_decoder_ids = (
+#         pipe.tokenizer.get_decoder_prompt_ids(
+#             language="marathi", task="transcribe"
+#         )
+#     )
+def transcribe_speech(filepath):
+    output = pipe(
+        filepath,
+        max_new_tokens=256,
+        generate_kwargs={
+            "task": "transcribe",
+            "language": "konkani",
+        },  # update with the language you've fine-tuned on
+        chunk_length_s=30,
+        batch_size=8,
+        # padding=True
+    )
+    return output["text"]
+demo = gr.Blocks()
+mic_transcribe = gr.Interface(
+    fn=transcribe_speech,
+    inputs=gr.Audio(sources="microphone", type="filepath"),
+    outputs=gr.components.Textbox(),
+)
+file_transcribe = gr.Interface(
+    fn=transcribe_speech,
+    inputs=gr.Audio(sources="upload", type="filepath"),
+    outputs=gr.components.Textbox(),
+)
+with demo:
+    gr.TabbedInterface(
+        [mic_transcribe, file_transcribe],
+        ["Transcribe Microphone", "Transcribe Audio File"],
+    )
+demo.launch(debug=True)
 # # def transcribe(audio):
 # #     # text = pipe(audio)["text"]
 # # iface.launch()
+# from transformers import WhisperTokenizer, pipeline
+# import gradio as gr
+# import os
+# tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="marathi", task="transcribe")
+# pipe = pipeline(model="thak123/gom-stt-v3", task="automatic-speech-recognition", tokenizer=tokenizer)
+# def transcribe(audio):
+#     result = pipe(audio)
+#     text = result[0]['text']
+#     print("op", text)
+#     return text
+# iface = gr.Interface(
+#     fn=transcribe,
+#     inputs=[gr.Audio(sources=["microphone", "upload"])],
+#     outputs="text",
+#     examples=[
+#         [os.path.join(os.path.dirname("."), "audio/chalyaami.mp3")],
+#         [os.path.join(os.path.dirname("."), "audio/ekdonteen.flac")],
+#         [os.path.join(os.path.dirname("."), "audio/heyatachadjaale.mp3")],
+#     ],
+#     title="Whisper Konkani",
+#     description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
+# )
+# iface.launch()