Spaces:

hadxu
/

chatpdf

Sleeping

App Files Files Community

chatpdf / app.py

hadxu

init

ba2ef59 10 months ago

raw

history blame

3.14 kB

	import urllib.request
	import fitz
	import re
	import numpy as np
	import tensorflow_hub as hub
	from openai import OpenAI
	import gradio as gr
	import os
	import shutil
	from pathlib import Path
	from tempfile import NamedTemporaryFile
	from sklearn.neighbors import NearestNeighbors
	import huggingface_hub

	# openai.base_url = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1/v1/"
	# openai.api_key = huggingface_hub.get_token()

	clinet = OpenAI(
	base_url='https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1/v1/',
	api_key=os.getenv('key')
	)

	from util import pdf_to_text, text_to_chunks, SemanticSearch

	recommender = SemanticSearch()
	def load_recommender(path, start_page=1):
	global recommender
	texts = pdf_to_text(path, start_page=start_page)
	chunks = text_to_chunks(texts, start_page=start_page)
	recommender.fit(chunks)
	return 'Corpus Loaded.'


	def generate_text(prompt, model = "gpt-3.5-turbo-16k-0613"):

	model="mistralai/Mixtral-8x7B-Instruct-v0.1"
	max_tokens=1024
	message = clinet.chat.completions.create(
	model=model,
	messages=[
	{"role": "user", "content": prompt}
	],
	max_tokens=max_tokens,
	).choices[0].message.content
	return message

	def generate_answer(question):
	topn_chunks = recommender(question)
	prompt = 'search results:\n\n'
	for c in topn_chunks:
	prompt += c + '\n\n'

	prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\
	"Cite each reference using [ Page Number] notation. "\
	"Only answer what is asked. The answer should be short and concise. "\
	"If asked in Chinese, respond in Chinese; if asked in English, respond"\
	"in English \n\nQuery: "

	prompt += f"{question}\nAnswer:"
	answer = generate_text(prompt)
	return answer

	def question_anwser(chat_history, file, question):
	suffix = Path(file.name).suffix
	with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
	shutil.copyfile(file.name, tmp.name)
	tmp_path = Path(tmp.name)

	load_recommender(str(tmp_path))
	answer = generate_answer(question)
	chat_history.append([question, answer])
	return chat_history

	title = 'PDF GPT '
	description = """ PDF GPT """

	with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:

	gr.Markdown(f'<center><h3>{title}</h3></center>')
	gr.Markdown(description)

	with gr.Row():

	with gr.Group():
	with gr.Accordion("URL or pdf file"):
	file = gr.File(label='Upload your PDF/ Research Paper / Book here', file_types=['.pdf'])
	question = gr.Textbox(label='Enter your question here')
	btn = gr.Button(value='Submit')

	with gr.Group():
	chatbot = gr.Chatbot(label="Chat History", elem_id="chatbot")

	btn.click(
	question_anwser,
	inputs=[chatbot, file, question],
	outputs=[chatbot],
	api_name="predict",
	)

	demo.launch(server_name="0.0.0.0")