# from transformers import WhisperTokenizer # import os # tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe" # from transformers import pipeline # import gradio as gr # import torch # pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom", # task="automatic-speech-recognition", tokenizer= tokenizer) # change to "your-username/the-name-you-picked" # # pipe.model.config.forced_decoder_ids = ( # # pipe.tokenizer.get_decoder_prompt_ids( # # language="marathi", task="transcribe" # # ) # # ) # def transcribe_speech(filepath): # output = pipe( # filepath, # max_new_tokens=256, # generate_kwargs={ # "task": "transcribe", # "language": "konkani", # }, # update with the language you've fine-tuned on # chunk_length_s=30, # batch_size=8, # padding=True # ) # return output["text"] # demo = gr.Blocks() # mic_transcribe = gr.Interface( # fn=transcribe_speech, # inputs=gr.Audio(sources="microphone", type="filepath"), # outputs=gr.components.Textbox(), # ) # file_transcribe = gr.Interface( # fn=transcribe_speech, # inputs=gr.Audio(sources="upload", type="filepath"), # outputs=gr.components.Textbox(), # ) # with demo: # gr.TabbedInterface( # [mic_transcribe, file_transcribe], # ["Transcribe Microphone", "Transcribe Audio File"], # ) # demo.launch(debug=True) # # def transcribe(audio): # # # text = pipe(audio)["text"] # # # pipe(audio) # # text = pipe(audio) # # print("op",text) # # return text#pipe(audio) #text # # iface = gr.Interface( # # fn=transcribe, # # inputs=[gr.Audio(sources=["microphone", "upload"])], # # outputs="text", # # examples=[ # # [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")], # # [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")], # # [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")], # # ], # # title="Whisper Konkani", # # description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.", # # ) # # iface.launch() from transformers import WhisperTokenizer, pipeline import gradio as gr import os tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="marathi", task="transcribe") pipe = pipeline(model="thak123/gom-stt-v3", task="automatic-speech-recognition", tokenizer=tokenizer) def transcribe(audio): result = pipe(audio) text = result[0]['text'] print("op", text) return text iface = gr.Interface( fn=transcribe, inputs=[gr.Audio(sources=["microphone", "upload"])], outputs="text", examples=[ [os.path.join(os.path.dirname("."), "audio/chalyaami.mp3")], [os.path.join(os.path.dirname("."), "audio/ekdonteen.flac")], [os.path.join(os.path.dirname("."), "audio/heyatachadjaale.mp3")], ], title="Whisper Konkani", description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.", ) iface.launch()