Spaces:
Runtime error
Runtime error
File size: 3,562 Bytes
29aeeac 33db722 29aeeac 33db722 4dfc3a9 17982b7 33db722 29aeeac 33db722 29aeeac 33db722 17982b7 33db722 29aeeac abb3988 71fc09f abb3988 bf852d8 abb3988 bf852d8 abb3988 3f7e079 29aeeac 71fc09f 29aeeac 17982b7 fbffa21 da6a822 fbffa21 6a387ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import threading
import re
import gradio as gr
import os
import google.generativeai as genai
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
import chromadb
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from uuid import uuid4
import gradio as gr
# Now you can use hugging_face_api_key in your code
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-pro') # Load the model
def get_Answer(query):
res = collection.query( # Assuming `collection` is defined elsewhere
query_texts=query,
n_results=2
)
system = f"""You are a teacher. You will be provided some context,
your task is to analyze the relevant context and answer the below question:
- {query}
"""
context = " ".join([re.sub(r'[^\x00-\x7F]+', ' ', r) for r in res['documents'][0]])
prompt = f"### System: {system} \n\n ###: User: {context} \n\n ### Assistant:\n"
answer = model.generate_content(prompt).text
return answer
# # Define the Gradio interface
# iface = gr.Interface(
# fn=get_Answer,
# inputs=gr.Textbox(lines=5, placeholder="Ask a question"), # Textbox for query
# outputs="textbox", # Display the generated answer in a textbox
# title="Answer Questions with Gemini-Pro",
# description="Ask a question and get an answer based on context from a ChromaDB collection.",
# )
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=50
)
client = chromadb.PersistentClient("test")
collection = client.create_collection("test_data")
def upload_pdf(file_path):
loader = PyPDFLoader(file_path)
pages = loader.load()
documents = []
for page in pages:
docs = text_splitter.split_text(page.page_content)
for doc in docs:
documents.append({
"text": docs, "meta_data": page.metadata,
})
collection.add(
ids=[str(uuid4()) for _ in range(len(documents))],
documents=[doc['text'][0] for doc in documents],
metadatas=[doc['meta_data'] for doc in documents]
)
return f"PDF Uploaded Successfully. {collection.count()} chunks stored in ChromaDB"
# # Define the Gradio interface
# iface = gr.Interface(
# fn=upload_pdf,
# inputs=["file"], # Specify a file input component
# outputs="textbox", # Display the output text in a textbox
# title="Upload PDF to ChromaDB",
# description="Upload a PDF file and store its text chunks in ChromaDB.",
# )
# Gradio interfaces
iface1 = gr.Interface(
fn=get_Answer,
inputs=gr.Textbox(lines=5, placeholder="Ask a question"),
outputs="textbox",
title="Answer Questions with Gemini-Pro",
description="Ask a question and get an answer based on context from a ChromaDB collection.",
)
iface2 = gr.Interface(
fn=upload_pdf,
inputs=["file"],
outputs="textbox",
title="Upload PDF to ChromaDB",
description="Upload a PDF file and store its text chunks in ChromaDB.",
)
# thread1 = threading.Thread(target=iface1.launch, args=(debug=True, share=True, server_port=7861))
# thread2 = threading.Thread(target=iface2.launch, args=(debug=True, share=True, server_port=7862))
debug = True # Define the variables
share = True
thread1 = threading.Thread(
target=iface1.launch, args=(debug, share, server_port:=8080) # Pass the variables
)
thread2 = threading.Thread(
target=iface2.launch, args=(debug, share, server_port:=8081) # Pass the variables
)
thread1.start()
thread2.start()
|