from transformers import pipeline import base64 import gradio as gr model_id = "openai/whisper-medium" # update with your model id #model_id ="openai/whisper-tiny" pipe = pipeline("automatic-speech-recognition", model=model_id) def transcribe_speech(filepath): output = pipe( filepath, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "spanish", }, # update with the language you've fine-tuned on chunk_length_s=30, batch_size=8, ) return output["text"] with open("Iso_Logotipo_Ceibal.png", "rb") as image_file: encoded_image = base64.b64encode(image_file.read()).decode() demo = gr.Blocks() mic_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(source="microphone", type="filepath"), outputs="textbox", ) file_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(source="upload", type="filepath"), outputs="textbox", ) with demo: gr.Markdown( """

Uso de AI para transcribir audio a texto.

Con este espacio podrás transcribir audio a texto.

""".format(encoded_image)) gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribir desde el micrófono.", "Transcribir desde un Archivo de Audio."], ) demo.launch()