|
import urllib.request |
|
import fitz |
|
import re |
|
from openai import OpenAI |
|
import gradio as gr |
|
import os |
|
import shutil |
|
from pathlib import Path |
|
import tensorflow_hub as hub |
|
from tempfile import NamedTemporaryFile |
|
|
|
client = OpenAI( |
|
base_url="https://openrouter.ai/api/v1", |
|
api_key=os.getenv('OPENROUTER_API_KEY') |
|
) |
|
|
|
from util import pdf_to_text, text_to_chunks, SemanticSearch |
|
|
|
recommender = SemanticSearch() |
|
def load_recommender(path, start_page=1): |
|
global recommender |
|
texts = pdf_to_text(path, start_page=start_page) |
|
chunks = text_to_chunks(texts, start_page=start_page) |
|
recommender.fit(chunks) |
|
return 'Corpus Loaded.' |
|
|
|
|
|
def generate_text(prompt): |
|
message = client.chat.completions.create( |
|
model="google/gemini-pro", |
|
messages=[ |
|
{"role": "user", "content": prompt} |
|
], |
|
).choices[0].message.content |
|
return message |
|
|
|
def generate_answer(question): |
|
topn_chunks = recommender(question) |
|
prompt = 'search results:\n\n' |
|
for c in topn_chunks: |
|
prompt += c + '\n\n' |
|
|
|
prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\ |
|
"Cite each reference using [ Page Number] notation. "\ |
|
"Only answer what is asked. The answer should be short and concise. "\ |
|
"If asked in Chinese, respond in Chinese; if asked in English, respond"\ |
|
"in English \n\nQuery: " |
|
|
|
prompt += f"{question}\nAnswer:" |
|
answer = generate_text(prompt) |
|
return answer |
|
|
|
|
|
def question_answer(chat_history, file, question): |
|
suffix = Path(file.name).suffix |
|
with NamedTemporaryFile(delete=False, suffix=suffix) as tmp: |
|
shutil.copyfile(file.name, tmp.name) |
|
tmp_path = Path(tmp.name) |
|
|
|
load_recommender(str(tmp_path)) |
|
answer = generate_answer(question) |
|
chat_history.append([question, answer]) |
|
return chat_history |
|
|
|
title = 'PDF GPT ' |
|
description = """ PDF GPT """ |
|
|
|
with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo: |
|
|
|
gr.Markdown(f'<center><h3>{title}</h3></center>') |
|
gr.Markdown(description) |
|
|
|
with gr.Row(): |
|
|
|
with gr.Group(): |
|
with gr.Accordion("URL or pdf file"): |
|
file = gr.File(label='Upload your PDF/ Research Paper / Book here', file_types=['.pdf']) |
|
question = gr.Textbox(label='Enter your question here') |
|
btn = gr.Button(value='Submit') |
|
|
|
with gr.Group(): |
|
chatbot = gr.Chatbot(label="Chat History", elem_id="chatbot") |
|
|
|
btn.click( |
|
question_answer, |
|
inputs=[chatbot, file, question], |
|
outputs=[chatbot], |
|
api_name="predict", |
|
) |
|
|
|
demo.launch(server_name="0.0.0.0") |