Spaces:
Sleeping
Sleeping
File size: 3,224 Bytes
8a1e498 b357c71 8a1e498 b357c71 8a1e498 b357c71 8a1e498 dfc9440 8a1e498 b357c71 9aedf57 8a1e498 9aedf57 b357c71 8a1e498 b357c71 8a1e498 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# from transformers import WhisperTokenizer
# import os
# tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"
# from transformers import pipeline
# import gradio as gr
# import torch
# pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
# task="automatic-speech-recognition", tokenizer= tokenizer) # change to "your-username/the-name-you-picked"
# # pipe.model.config.forced_decoder_ids = (
# # pipe.tokenizer.get_decoder_prompt_ids(
# # language="marathi", task="transcribe"
# # )
# # )
# def transcribe_speech(filepath):
# output = pipe(
# filepath,
# max_new_tokens=256,
# generate_kwargs={
# "task": "transcribe",
# "language": "konkani",
# }, # update with the language you've fine-tuned on
# chunk_length_s=30,
# batch_size=8,
# padding=True
# )
# return output["text"]
# demo = gr.Blocks()
# mic_transcribe = gr.Interface(
# fn=transcribe_speech,
# inputs=gr.Audio(sources="microphone", type="filepath"),
# outputs=gr.components.Textbox(),
# )
# file_transcribe = gr.Interface(
# fn=transcribe_speech,
# inputs=gr.Audio(sources="upload", type="filepath"),
# outputs=gr.components.Textbox(),
# )
# with demo:
# gr.TabbedInterface(
# [mic_transcribe, file_transcribe],
# ["Transcribe Microphone", "Transcribe Audio File"],
# )
# demo.launch(debug=True)
# # def transcribe(audio):
# # # text = pipe(audio)["text"]
# # # pipe(audio)
# # text = pipe(audio)
# # print("op",text)
# # return text#pipe(audio) #text
# # iface = gr.Interface(
# # fn=transcribe,
# # inputs=[gr.Audio(sources=["microphone", "upload"])],
# # outputs="text",
# # examples=[
# # [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
# # [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
# # [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
# # ],
# # title="Whisper Konkani",
# # description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
# # )
# # iface.launch()
from transformers import WhisperTokenizer, pipeline
import gradio as gr
import os
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="marathi", task="transcribe")
pipe = pipeline(model="thak123/gom-stt-v3", task="automatic-speech-recognition", tokenizer=tokenizer)
def transcribe(audio):
result = pipe(audio)
text = result[0]['text']
print("op", text)
return text
iface = gr.Interface(
fn=transcribe,
inputs=[gr.Audio(sources=["microphone", "upload"])],
outputs="text",
examples=[
[os.path.join(os.path.dirname("."), "audio/chalyaami.mp3")],
[os.path.join(os.path.dirname("."), "audio/ekdonteen.flac")],
[os.path.join(os.path.dirname("."), "audio/heyatachadjaale.mp3")],
],
title="Whisper Konkani",
description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
)
iface.launch() |