import gradio as gr
from transformers import pipeline
from transformers.utils import logging
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import torch
from llama_index.core import VectorStoreIndex
from llama_index.core import Document
from llama_index.core import Settings
from llama_index.llms.huggingface import (
    HuggingFaceInferenceAPI,
    HuggingFaceLLM,
)
Settings.llm = HuggingFaceLLM(model_name="facebook/blenderbot-400M-distill",
                              device_map="cpu",
                              context_window=128,
                              tokenizer_name="facebook/blenderbot-400M-distill"
                             )
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
documents = [Document(text="Indian parliament elections happened in April-May 2024. BJP Party won.")]
index = VectorStoreIndex.from_documents(
    documents,
)

query_engine = index.as_query_engine()
def rag(input_text, file):
    return query_engine.query(
        input_text
    )

iface = gr.Interface(fn=rag, inputs=[gr.Textbox(label="Question", lines=6), gr.File()], 
                     outputs=[gr.Textbox(label="Result", lines=6)], 
                     title="Answer my question",
                     description= "CoolChatBot"
                    )
iface.launch()