|
import urllib.request |
|
import fitz |
|
import re |
|
import numpy as np |
|
import tensorflow_hub as hub |
|
from openai import OpenAI |
|
import gradio as gr |
|
import os |
|
import shutil |
|
from pathlib import Path |
|
from tempfile import NamedTemporaryFile |
|
from sklearn.neighbors import NearestNeighbors |
|
import huggingface_hub |
|
|
|
|
|
|
|
|
|
clinet = OpenAI( |
|
base_url='https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1/v1/', |
|
api_key=os.getenv('key') |
|
) |
|
|
|
from util import pdf_to_text, text_to_chunks, SemanticSearch |
|
|
|
recommender = SemanticSearch() |
|
def load_recommender(path, start_page=1): |
|
global recommender |
|
texts = pdf_to_text(path, start_page=start_page) |
|
chunks = text_to_chunks(texts, start_page=start_page) |
|
recommender.fit(chunks) |
|
return 'Corpus Loaded.' |
|
|
|
|
|
def generate_text(prompt, model = "gpt-3.5-turbo-16k-0613"): |
|
|
|
model="mistralai/Mixtral-8x7B-Instruct-v0.1" |
|
max_tokens=1024 |
|
message = clinet.chat.completions.create( |
|
model=model, |
|
messages=[ |
|
{"role": "user", "content": prompt} |
|
], |
|
max_tokens=max_tokens, |
|
).choices[0].message.content |
|
return message |
|
|
|
def generate_answer(question): |
|
topn_chunks = recommender(question) |
|
prompt = 'search results:\n\n' |
|
for c in topn_chunks: |
|
prompt += c + '\n\n' |
|
|
|
prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\ |
|
"Cite each reference using [ Page Number] notation. "\ |
|
"Only answer what is asked. The answer should be short and concise. "\ |
|
"If asked in Chinese, respond in Chinese; if asked in English, respond"\ |
|
"in English \n\nQuery: " |
|
|
|
prompt += f"{question}\nAnswer:" |
|
answer = generate_text(prompt) |
|
return answer |
|
|
|
def question_anwser(chat_history, file, question): |
|
suffix = Path(file.name).suffix |
|
with NamedTemporaryFile(delete=False, suffix=suffix) as tmp: |
|
shutil.copyfile(file.name, tmp.name) |
|
tmp_path = Path(tmp.name) |
|
|
|
load_recommender(str(tmp_path)) |
|
answer = generate_answer(question) |
|
chat_history.append([question, answer]) |
|
return chat_history |
|
|
|
title = 'PDF GPT ' |
|
description = """ PDF GPT """ |
|
|
|
with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo: |
|
|
|
gr.Markdown(f'<center><h3>{title}</h3></center>') |
|
gr.Markdown(description) |
|
|
|
with gr.Row(): |
|
|
|
with gr.Group(): |
|
with gr.Accordion("URL or pdf file"): |
|
file = gr.File(label='Upload your PDF/ Research Paper / Book here', file_types=['.pdf']) |
|
question = gr.Textbox(label='Enter your question here') |
|
btn = gr.Button(value='Submit') |
|
|
|
with gr.Group(): |
|
chatbot = gr.Chatbot(label="Chat History", elem_id="chatbot") |
|
|
|
btn.click( |
|
question_anwser, |
|
inputs=[chatbot, file, question], |
|
outputs=[chatbot], |
|
api_name="predict", |
|
) |
|
|
|
demo.launch(server_name="0.0.0.0") |