Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
-
|
4 |
from langchain_community.document_loaders import PyPDFLoader
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain_community.vectorstores import Chroma
|
@@ -10,39 +10,25 @@ from langchain_community.llms import HuggingFacePipeline
|
|
10 |
from langchain.chains import ConversationChain
|
11 |
from langchain.memory import ConversationBufferMemory
|
12 |
from langchain_community.llms import HuggingFaceEndpoint
|
13 |
-
|
14 |
-
import vertexai
|
15 |
from pathlib import Path
|
16 |
import chromadb
|
17 |
from unidecode import unidecode
|
|
|
18 |
from transformers import AutoTokenizer
|
19 |
import transformers
|
20 |
import torch
|
21 |
import tqdm
|
22 |
import accelerate
|
23 |
import re
|
24 |
-
from langchain_openai import AzureChatOpenAI
|
25 |
|
26 |
-
|
27 |
-
creds_json_str = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
28 |
-
if creds_json_str is None:
|
29 |
-
raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
|
30 |
|
31 |
-
# create a temporary file
|
32 |
-
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
|
33 |
-
temp.write(creds_json_str) # write in json format
|
34 |
-
temp_filename = temp.name
|
35 |
|
36 |
-
return temp_filename
|
37 |
-
|
38 |
-
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= get_credentials()
|
39 |
os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
list_llm = ["Azure-OpenAI"]
|
|
|
46 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
47 |
|
48 |
# Load PDF document and create doc splits
|
@@ -88,17 +74,40 @@ def load_db():
|
|
88 |
# Initialize langchain LLM chain
|
89 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
90 |
progress(0.1, desc="Initializing HF tokenizer...")
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
# HuggingFaceHub uses HF inference endpoints
|
94 |
progress(0.5, desc="Initializing HF Hub...")
|
95 |
-
|
|
|
|
|
96 |
llm = AzureChatOpenAI(
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
102 |
|
103 |
progress(0.75, desc="Defining buffer memory...")
|
104 |
memory = ConversationBufferMemory(
|
@@ -228,13 +237,13 @@ def demo():
|
|
228 |
collection_name = gr.State()
|
229 |
|
230 |
gr.Markdown(
|
231 |
-
"""<center><h2>
|
232 |
<h3>Ask any questions about your PDF documents</h3>""")
|
233 |
gr.Markdown(
|
234 |
-
"""<b>Note:</b> This AI assistant, using Langchain and
|
235 |
The user interface explicitely shows multiple steps to help understand the RAG workflow.
|
236 |
This chatbot takes past questions into account when generating answers (via conversational memory), and includes document references for clarity purposes.<br>
|
237 |
-
<br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps can take some time to generate a reply.
|
238 |
""")
|
239 |
|
240 |
with gr.Tab("Step 1 - Upload PDF"):
|
@@ -314,8 +323,9 @@ def demo():
|
|
314 |
inputs=None, \
|
315 |
outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
|
316 |
queue=False)
|
317 |
-
demo.queue().launch(
|
318 |
|
319 |
|
320 |
if __name__ == "__main__":
|
321 |
demo()
|
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
+
|
4 |
from langchain_community.document_loaders import PyPDFLoader
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain_community.vectorstores import Chroma
|
|
|
10 |
from langchain.chains import ConversationChain
|
11 |
from langchain.memory import ConversationBufferMemory
|
12 |
from langchain_community.llms import HuggingFaceEndpoint
|
13 |
+
|
|
|
14 |
from pathlib import Path
|
15 |
import chromadb
|
16 |
from unidecode import unidecode
|
17 |
+
|
18 |
from transformers import AutoTokenizer
|
19 |
import transformers
|
20 |
import torch
|
21 |
import tqdm
|
22 |
import accelerate
|
23 |
import re
|
|
|
24 |
|
25 |
+
from langchain_openai import AzureChatOpenAI
|
|
|
|
|
|
|
26 |
|
|
|
|
|
|
|
|
|
27 |
|
|
|
|
|
|
|
28 |
os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
|
29 |
+
# default_persist_directory = './chroma_HF/'
|
|
|
|
|
|
|
|
|
30 |
list_llm = ["Azure-OpenAI"]
|
31 |
+
|
32 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
33 |
|
34 |
# Load PDF document and create doc splits
|
|
|
74 |
# Initialize langchain LLM chain
|
75 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
76 |
progress(0.1, desc="Initializing HF tokenizer...")
|
77 |
+
# HuggingFacePipeline uses local model
|
78 |
+
# Note: it will download model locally...
|
79 |
+
# tokenizer=AutoTokenizer.from_pretrained(llm_model)
|
80 |
+
# progress(0.5, desc="Initializing HF pipeline...")
|
81 |
+
# pipeline=transformers.pipeline(
|
82 |
+
# "text-generation",
|
83 |
+
# model=llm_model,
|
84 |
+
# tokenizer=tokenizer,
|
85 |
+
# torch_dtype=torch.bfloat16,
|
86 |
+
# trust_remote_code=True,
|
87 |
+
# device_map="auto",
|
88 |
+
# # max_length=1024,
|
89 |
+
# max_new_tokens=max_tokens,
|
90 |
+
# do_sample=True,
|
91 |
+
# top_k=top_k,
|
92 |
+
# num_return_sequences=1,
|
93 |
+
# eos_token_id=tokenizer.eos_token_id
|
94 |
+
# )
|
95 |
+
# llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': temperature})
|
96 |
|
97 |
# HuggingFaceHub uses HF inference endpoints
|
98 |
progress(0.5, desc="Initializing HF Hub...")
|
99 |
+
# Use of trust_remote_code as model_kwargs
|
100 |
+
# Warning: langchain issue
|
101 |
+
# URL: https://github.com/langchain-ai/langchain/issues/6080
|
102 |
llm = AzureChatOpenAI(
|
103 |
+
azure_endpoint = "https://cloudcafe42.openai.azure.com/",
|
104 |
+
azure_deployment = "gpt-4",
|
105 |
+
openai_api_version = "2024-02-15-preview",
|
106 |
+
temperature = temperature,
|
107 |
+
max_new_tokens = max_tokens,
|
108 |
+
top_k = top_k
|
109 |
+
)
|
110 |
+
|
111 |
|
112 |
progress(0.75, desc="Defining buffer memory...")
|
113 |
memory = ConversationBufferMemory(
|
|
|
237 |
collection_name = gr.State()
|
238 |
|
239 |
gr.Markdown(
|
240 |
+
"""<center><h2>PDF-based chatbot</center></h2>
|
241 |
<h3>Ask any questions about your PDF documents</h3>""")
|
242 |
gr.Markdown(
|
243 |
+
"""<b>Note:</b> This AI assistant, using Langchain and open-source LLMs, performs retrieval-augmented generation (RAG) from your PDF documents. \
|
244 |
The user interface explicitely shows multiple steps to help understand the RAG workflow.
|
245 |
This chatbot takes past questions into account when generating answers (via conversational memory), and includes document references for clarity purposes.<br>
|
246 |
+
<br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate a reply.
|
247 |
""")
|
248 |
|
249 |
with gr.Tab("Step 1 - Upload PDF"):
|
|
|
323 |
inputs=None, \
|
324 |
outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
|
325 |
queue=False)
|
326 |
+
demo.queue().launch(debug=True)
|
327 |
|
328 |
|
329 |
if __name__ == "__main__":
|
330 |
demo()
|
331 |
+
|