Spaces:

aakash0563
/

PDFQueryBot

Runtime error

File size: 3,562 Bytes

29aeeac
33db722
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29aeeac
 
 
 
 
 
 
 
33db722
 
4dfc3a9
17982b7
33db722
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29aeeac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33db722
29aeeac
 
33db722
 
17982b7
 
33db722
29aeeac
abb3988
 
 
71fc09f
 
 
abb3988
bf852d8
abb3988
 
bf852d8
abb3988
 
3f7e079
29aeeac
71fc09f
29aeeac
 
17982b7
fbffa21
da6a822
fbffa21
6a387ad

import threading
import re
import gradio as gr
import os
import google.generativeai as genai
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")


import chromadb
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from uuid import uuid4
import gradio as gr

# Now you can use hugging_face_api_key in your code

genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-pro')  # Load the model

def get_Answer(query):
    res = collection.query(  # Assuming `collection` is defined elsewhere
        query_texts=query,
        n_results=2
    )
    system = f"""You are a teacher. You will be provided some context, 
    your task is to analyze the relevant context and answer the below question:
    - {query}
    """
    context = " ".join([re.sub(r'[^\x00-\x7F]+', ' ', r) for r in res['documents'][0]])
    prompt = f"### System: {system} \n\n ###: User: {context} \n\n ### Assistant:\n"
    answer = model.generate_content(prompt).text
    return answer

# # Define the Gradio interface
# iface = gr.Interface(
#     fn=get_Answer,
#     inputs=gr.Textbox(lines=5, placeholder="Ask a question"),  # Textbox for query
#     outputs="textbox",  # Display the generated answer in a textbox
#     title="Answer Questions with Gemini-Pro",
#     description="Ask a question and get an answer based on context from a ChromaDB collection.",
# )




text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=50
)
client = chromadb.PersistentClient("test")
collection = client.create_collection("test_data")

def upload_pdf(file_path):
    loader = PyPDFLoader(file_path)
    pages = loader.load()
    documents = []
    for page in pages:
        docs = text_splitter.split_text(page.page_content)
        for doc in docs:
            documents.append({
                "text": docs, "meta_data": page.metadata,
            })
    collection.add(
        ids=[str(uuid4()) for _ in range(len(documents))],
        documents=[doc['text'][0] for doc in documents],
        metadatas=[doc['meta_data'] for doc in documents]
    )
    return f"PDF Uploaded Successfully. {collection.count()} chunks stored in ChromaDB"

# # Define the Gradio interface
# iface = gr.Interface(
#     fn=upload_pdf,
#     inputs=["file"],  # Specify a file input component
#     outputs="textbox",  # Display the output text in a textbox
#     title="Upload PDF to ChromaDB",
#     description="Upload a PDF file and store its text chunks in ChromaDB.",
# )

# Gradio interfaces
iface1 = gr.Interface(
    fn=get_Answer,
    inputs=gr.Textbox(lines=5, placeholder="Ask a question"),
    outputs="textbox",
    title="Answer Questions with Gemini-Pro",
    description="Ask a question and get an answer based on context from a ChromaDB collection.",
)
iface2 = gr.Interface(
    fn=upload_pdf,
    inputs=["file"],
    outputs="textbox",
    title="Upload PDF to ChromaDB",
    description="Upload a PDF file and store its text chunks in ChromaDB.",
)



# thread1 = threading.Thread(target=iface1.launch, args=(debug=True, share=True, server_port=7861))
# thread2 = threading.Thread(target=iface2.launch, args=(debug=True, share=True, server_port=7862))

debug = True  # Define the variables
share = True

thread1 = threading.Thread(
    target=iface1.launch, args=(debug, share, server_port:=8080)  # Pass the variables
)
thread2 = threading.Thread(
    target=iface2.launch, args=(debug, share, server_port:=8081)  # Pass the variables
)




thread1.start()
thread2.start()