import gradio as gr from PIL import Image from transformers import pipeline transcribe = pipeline("automatic-speech-recognition", model = "facebook/wav2vec2-large-xlsr-53-spanish") classifier = pipeline("text-classification", model = "pysentimiento/robertuito-sentiment-analysis") image_classifier = pipeline("image-classification", model="microsoft/swin-tiny-patch4-window7-224") def audio_to_text(audio): text = transcribe(audio)["text"] return text def text_to_sentiment(text): return classifier(text)[0]["label"] def classify_image(image): image = Image.fromarray(image.astype('uint8'), 'RGB') answers = image_classifier(image) labels = {answer["label"]: answer["score"] for answer in answers} return labels demo = gr.Blocks() with demo: gr.Markdown("Example with Gradio Blocks") with gr.Tabs(): with gr.TabItem("Transcribe audio in Spanish"): with gr.Row(): audio = gr.Audio(sources="microphone", type="filepath") transcription = gr.Textbox() transcribeButton = gr.Button("Transcribe") with gr.TabItem("Sentiment analysis in English and Spanish"): with gr.Row(): text = gr.Textbox() label = gr.Label() sentimentButton = gr.Button("Calculate sentiment") with gr.TabItem("Image Classification"): with gr.Row(): image = gr.Image(label="Upload an image here") label_image = gr.Label(num_top_classes=3) classifyButton = gr.Button("Classify image") transcribeButton.click(audio_to_text, inputs = audio, outputs=transcription) sentimentButton.click(text_to_sentiment, inputs=text, outputs=label) classifyButton. click(classify_image, inputs=image, outputs=label_image) demo.launch()