import gradio as gr
from transformers import pipeline
from PIL import Image

trans=pipeline("automatic-speech-recognition",model="facebook/wav2vec2-large-xlsr-53-spanish")
sentiment_cla=pipeline("text-classification",model="pysentimiento/robertuito-sentiment-analysis")
image_cla=pipeline("image-classification", model="microsoft/swin-tiny-patch4-window7-224")

def audio2text(audio):
  text = trans(audio)["text"]
  return text

def text2sentiment(text):
  return sentiment_cla(text)[0]["label"]

def classify_img(im):
    im = Image.fromarray(im.astype('uint8'), 'RGB')
    ans = image_cla(im)
    labels = {v["label"]: v["score"] for v in ans}
    return labels

demo=gr.Blocks()
with demo:
  gr.Markdown("""
Este 'Space' permite la inferencia de los siguientes modelos de IA de tipo open-source:
- Voice2Text (en Español): [Wav2Vec2](https://huggingface.co/facebook/wav2vec2-large-xlsr-53-spanish)
- Sentiment Analysis (en Español): [Robertuito](https://huggingface.co/pysentimiento/robertuito-sentiment-analysis)
- Image Classifier: [Swin-small-patch4](https://huggingface.co/microsoft/swin-small-patch4-window7-224)

Autor del demo: [ΜΕΤΑΝΘΡΩΠΙΑ](https://www.instagram.com/metantropia.jpg)
  """)
    
  with gr.Tabs():
      
    with gr.TabItem("Audio a Texto"):
      with gr.Row():
        audio=gr.Audio(sources="microphone",type="filepath")
        transcripcion=gr.Textbox()
      b1=gr.Button("Transcribe")

    with gr.TabItem("Analisis de Sentimiento"):
      with gr.Row():
        text=gr.Textbox()
        label=gr.Label()
      b2=gr.Button("Sentimiento")

    with gr.TabItem("Clasificación de Imágenes"):
      with gr.Row():
        image = gr.Image(label="Carga una Imagen")
        label_image = gr.Label(num_top_classes=5)
      b3 = gr.Button("Clasifica")

    b1.click(audio2text,inputs=audio,outputs=transcripcion)
    b2.click(text2sentiment,inputs=text,outputs=label)
    b3.click(classify_img, inputs=image, outputs=label_image)
    
demo.launch()