Spaces:

Roberta2024
/

Nvidia_RAG_pdf

Sleeping

File size: 3,346 Bytes

ae38eb4
 
 
 
959d70e
 
 
ae38eb4
 
 
959d70e
 
 
ae38eb4
 
959d70e
ae38eb4
 
959d70e
ae38eb4
4def369
 
 
 
 
 
 
 
 
 
ae38eb4
4def369
 
 
 
 
 
 
 
 
 
 
 
b6c3f96
4def369
 
 
 
 
 
 
 
ae38eb4
 
 
 
959d70e
ae38eb4
4def369
 
 
 
ae38eb4
 
 
 
 
 
959d70e
ae38eb4

import os
import gradio as gr
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai
from langchain.chains.question_answering import load_qa_chain
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Configure Gemini API
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Load Mistral model
model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
mistral_tokenizer = AutoTokenizer.from_pretrained(model_path)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
dtype = torch.bfloat16
mistral_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)

def initialize(file_path, question):
    try:
        model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
        prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                              not contained in the context, say "answer not available in context" \n\n
                              Context: \n {context}?\n
                              Question: \n {question} \n
                              Answer:
                            """
        prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
        
        if os.path.exists(file_path):
            pdf_loader = PyPDFLoader(file_path)
            pages = pdf_loader.load_and_split()
            context = "\n".join(str(page.page_content) for page in pages[:30])
            stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
            stuff_answer = stuff_chain({"input_documents": pages, "question": question, "context": context}, return_only_outputs=True)
            gemini_answer = stuff_answer['output_text']
            
            # Use Mistral model for additional text generation
            mistral_prompt = f"Based on this answer: {gemini_answer}\nGenerate a follow-up question:"
            mistral_inputs = mistral_tokenizer.encode(mistral_prompt, return_tensors='pt').to(device)
            with torch.no_grad():
                mistral_outputs = mistral_model.generate(mistral_inputs, max_length=150)
            mistral_output = mistral_tokenizer.decode(mistral_outputs[0], skip_special_tokens=True)
            
            combined_output = f"Gemini Answer: {gemini_answer}\n\nMistral Follow-up: {mistral_output}"
            return combined_output
        else:
            return "Error: Unable to process the document. Please ensure the PDF file is valid."
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Define Gradio Interface
input_file = gr.File(label="Upload PDF File")
input_question = gr.Textbox(label="Ask about the document")
output_text = gr.Textbox(label="Answer - Combined Gemini and Mistral")

def pdf_qa(file, question):
    if file is None:
        return "Please upload a PDF file first."
    return initialize(file.name, question)

# Create Gradio Interface
gr.Interface(
    fn=pdf_qa,
    inputs=[input_file, input_question],
    outputs=output_text,
    title="RAG Knowledge Retrieval using Gemini API and Mistral Model",
    description="Upload a PDF file and ask questions about the content."
).launch()