AI_Beta

Sleeping

App Files Files

JohnSmith9982 commited on May 22, 2023

Commit

11eb8f3

1 Parent(s): 7e611fd

Upload 57 files

Browse files

Files changed (18) hide show

ChuanhuChatbot.py +7 -3
modules/__pycache__/config.cpython-39.pyc +0 -0
modules/__pycache__/index_func.cpython-39.pyc +0 -0
modules/__pycache__/overwrites.cpython-39.pyc +0 -0
modules/__pycache__/presets.cpython-39.pyc +0 -0
modules/config.py +11 -1
modules/index_func.py +141 -0
modules/models/ChuanhuAgent.py +181 -0
modules/models/PaLM.py +11 -0
modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc +0 -0
modules/models/__pycache__/base_model.cpython-39.pyc +0 -0
modules/models/__pycache__/models.cpython-39.pyc +0 -0
modules/models/base_model.py +88 -42
modules/models/models.py +9 -3
modules/overwrites.py +1 -11
modules/pdf_func.py +7 -7
modules/presets.py +9 -2
requirements.txt +11 -2

ChuanhuChatbot.py CHANGED Viewed

@@ -15,7 +15,6 @@ from modules.models.models import get_model
 gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
 gr.Chatbot.postprocess = postprocess
-PromptHelper.compact_text_chunks = compact_text_chunks
 with open("assets/custom.css", "r", encoding="utf-8") as f:
     customCSS = f.read()
@@ -244,7 +243,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                             lines=1,
                         )
-                    with gr.Accordion(i18n("网络设置"), open=False, visible=False):
                         # 优先展示自定义的api_host
                         apihostTxt = gr.Textbox(
                             show_label=True,
@@ -333,7 +332,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     submitBtn.click(**transfer_input_args).then(**chatgpt_predict_args, api_name="predict").then(**end_outputing_args)
     submitBtn.click(**get_usage_args)
-    index_files.change(handle_file_upload, [current_model, index_files, chatbot], [index_files, chatbot, status_display])
     emptyBtn.click(
         reset,
@@ -467,7 +466,12 @@ demo.title = i18n("川虎Chat 🚀")
 if __name__ == "__main__":
     reload_javascript()
     demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
         favicon_path="./assets/favicon.ico",
     )
     # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860, share=False) # 可自定义端口
     # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860,auth=("在这里填写用户名", "在这里填写密码")) # 可设置用户名与密码

 gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
 gr.Chatbot.postprocess = postprocess
 with open("assets/custom.css", "r", encoding="utf-8") as f:
     customCSS = f.read()
                             lines=1,
                         )
+                    with gr.Accordion(i18n("网络设置"), open=False):
                         # 优先展示自定义的api_host
                         apihostTxt = gr.Textbox(
                             show_label=True,
     submitBtn.click(**transfer_input_args).then(**chatgpt_predict_args, api_name="predict").then(**end_outputing_args)
     submitBtn.click(**get_usage_args)
+    index_files.change(handle_file_upload, [current_model, index_files, chatbot, language_select_dropdown], [index_files, chatbot, status_display])
     emptyBtn.click(
         reset,
 if __name__ == "__main__":
     reload_javascript()
     demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
+        server_name=server_name,
+        server_port=server_port,
+        share=share,
+        auth=auth_list if authflag else None,
         favicon_path="./assets/favicon.ico",
+        inbrowser=not dockerflag, # 禁止在docker下开启inbrowser
     )
     # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860, share=False) # 可自定义端口
     # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860,auth=("在这里填写用户名", "在这里填写密码")) # 可设置用户名与密码

modules/__pycache__/config.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/config.cpython-39.pyc and b/modules/__pycache__/config.cpython-39.pyc differ

modules/__pycache__/index_func.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/index_func.cpython-39.pyc and b/modules/__pycache__/index_func.cpython-39.pyc differ

modules/__pycache__/overwrites.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/overwrites.cpython-39.pyc and b/modules/__pycache__/overwrites.cpython-39.pyc differ

modules/__pycache__/presets.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/presets.cpython-39.pyc and b/modules/__pycache__/presets.cpython-39.pyc differ

modules/config.py CHANGED Viewed

@@ -24,7 +24,8 @@ __all__ = [
     "server_name",
     "server_port",
     "share",
-    "hide_history_when_not_logged_in"
 ]
 # 添加一个统一的config文件，避免文件过多造成的疑惑（优先级最低）
@@ -76,6 +77,9 @@ my_api_key = os.environ.get("OPENAI_API_KEY", my_api_key)
 xmchat_api_key = config.get("xmchat_api_key", "")
 os.environ["XMCHAT_API_KEY"] = xmchat_api_key
 render_latex = config.get("render_latex", True)
 if render_latex:
@@ -102,6 +106,12 @@ api_host = os.environ.get("api_host", config.get("api_host", ""))
 if api_host:
     shared.state.set_api_host(api_host)
 @contextmanager
 def retrieve_openai_api(api_key = None):
     old_api_key = os.environ.get("OPENAI_API_KEY", "")

     "server_name",
     "server_port",
     "share",
+    "hide_history_when_not_logged_in",
+    "default_chuanhu_assistant_model"
 ]
 # 添加一个统一的config文件，避免文件过多造成的疑惑（优先级最低）
 xmchat_api_key = config.get("xmchat_api_key", "")
 os.environ["XMCHAT_API_KEY"] = xmchat_api_key
+google_palm_api_key = config.get("google_palm_api_key", "")
+os.environ["GOOGLE_PALM_API_KEY"] = google_palm_api_key
 render_latex = config.get("render_latex", True)
 if render_latex:
 if api_host:
     shared.state.set_api_host(api_host)
+default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
+os.environ["GOOGLE_CSE_ID"] = config.get("GOOGLE_CSE_ID", "")
+os.environ["GOOGLE_API_KEY"] = config.get("GOOGLE_API_KEY", "")
+os.environ["WOLFRAM_ALPHA_APPID"] = config.get("WOLFRAM_ALPHA_APPID", "")
+os.environ["SERPAPI_API_KEY"] = config.get("SERPAPI_API_KEY", "")
 @contextmanager
 def retrieve_openai_api(api_key = None):
     old_api_key = os.environ.get("OPENAI_API_KEY", "")

modules/index_func.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import os
+import logging
+import colorama
+import PyPDF2
+from tqdm import tqdm
+from modules.presets import *
+from modules.utils import *
+from modules.config import local_embedding
+def get_index_name(file_src):
+    file_paths = [x.name for x in file_src]
+    file_paths.sort(key=lambda x: os.path.basename(x))
+    md5_hash = hashlib.md5()
+    for file_path in file_paths:
+        with open(file_path, "rb") as f:
+            while chunk := f.read(8192):
+                md5_hash.update(chunk)
+    return md5_hash.hexdigest()
+def get_documents(file_src):
+    from langchain.schema import Document
+    from langchain.text_splitter import TokenTextSplitter
+    text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=30)
+    documents = []
+    logging.debug("Loading documents...")
+    logging.debug(f"file_src: {file_src}")
+    for file in file_src:
+        filepath = file.name
+        filename = os.path.basename(filepath)
+        file_type = os.path.splitext(filename)[1]
+        logging.info(f"loading file: {filename}")
+        try:
+            if file_type == ".pdf":
+                logging.debug("Loading PDF...")
+                try:
+                    from modules.pdf_func import parse_pdf
+                    from modules.config import advance_docs
+                    two_column = advance_docs["pdf"].get("two_column", False)
+                    pdftext = parse_pdf(filepath, two_column).text
+                except:
+                    pdftext = ""
+                    with open(filepath, "rb") as pdfFileObj:
+                        pdfReader = PyPDF2.PdfReader(pdfFileObj)
+                        for page in tqdm(pdfReader.pages):
+                            pdftext += page.extract_text()
+                texts = Document(page_content=pdftext, metadata={"source": filepath})
+            elif file_type == ".docx":
+                logging.debug("Loading Word...")
+                from langchain.document_loaders import UnstructuredWordDocumentLoader
+                loader = UnstructuredWordDocumentLoader(filepath)
+                texts = loader.load()
+            elif file_type == ".pptx":
+                logging.debug("Loading PowerPoint...")
+                from langchain.document_loaders import UnstructuredPowerPointLoader
+                loader = UnstructuredPowerPointLoader(filepath)
+                texts = loader.load()
+            elif file_type == ".epub":
+                logging.debug("Loading EPUB...")
+                from langchain.document_loaders import UnstructuredEPubLoader
+                loader = UnstructuredEPubLoader(filepath)
+                texts = loader.load()
+            elif file_type == ".xlsx":
+                logging.debug("Loading Excel...")
+                text_list = excel_to_string(filepath)
+                for elem in text_list:
+                    documents.append(Document(page_content=elem, metadata={"source": filepath}))
+                continue
+            else:
+                logging.debug("Loading text file...")
+                from langchain.document_loaders import TextLoader
+                loader = TextLoader(filepath, "utf8")
+                texts = loader.load()
+        except Exception as e:
+            import traceback
+            logging.error(f"Error loading file: {filename}")
+            traceback.print_exc()
+        texts = text_splitter.split_documents(texts)
+        documents.extend(texts)
+    logging.debug("Documents loaded.")
+    return documents
+def construct_index(
+    api_key,
+    file_src,
+    max_input_size=4096,
+    num_outputs=5,
+    max_chunk_overlap=20,
+    chunk_size_limit=600,
+    embedding_limit=None,
+    separator=" ",
+):
+    from langchain.chat_models import ChatOpenAI
+    from langchain.vectorstores import FAISS
+    if api_key:
+        os.environ["OPENAI_API_KEY"] = api_key
+    else:
+        # 由于一个依赖的愚蠢的设计，这里必须要有一个API KEY
+        os.environ["OPENAI_API_KEY"] = "sk-xxxxxxx"
+    chunk_size_limit = None if chunk_size_limit == 0 else chunk_size_limit
+    embedding_limit = None if embedding_limit == 0 else embedding_limit
+    separator = " " if separator == "" else separator
+    index_name = get_index_name(file_src)
+    index_path = f"./index/{index_name}"
+    if local_embedding:
+        from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+        embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/distiluse-base-multilingual-cased-v2")
+    else:
+        from langchain.embeddings import OpenAIEmbeddings
+        embeddings = OpenAIEmbeddings()
+    if os.path.exists(index_path):
+        logging.info("找到了缓存的索引文件，加载中……")
+        return FAISS.load_local(index_path, embeddings)
+    else:
+        try:
+            documents = get_documents(file_src)
+            logging.info("构建索引中……")
+            with retrieve_proxy():
+                index = FAISS.from_documents(documents, embeddings)
+            logging.debug("索引构建完成！")
+            os.makedirs("./index", exist_ok=True)
+            index.save_local(index_path)
+            logging.debug("索引已保存至本地!")
+            return index
+        except Exception as e:
+            import traceback
+            logging.error("索引构建失败！", e)
+            traceback.print_exc()
+            return None

modules/models/ChuanhuAgent.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from langchain.chains.summarize import load_summarize_chain
+from langchain import PromptTemplate, LLMChain
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import PromptTemplate
+from langchain.text_splitter import TokenTextSplitter
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chains import RetrievalQA
+from langchain.agents import load_tools
+from langchain.agents import initialize_agent
+from langchain.agents import AgentType
+from langchain.docstore.document import Document
+from langchain.tools import BaseTool, StructuredTool, Tool, tool
+from langchain.callbacks.stdout import StdOutCallbackHandler
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.callbacks.manager import BaseCallbackManager
+from typing import Any, Dict, List, Optional, Union
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.input import print_text
+from langchain.schema import AgentAction, AgentFinish, LLMResult
+from pydantic import BaseModel, Field
+import requests
+from bs4 import BeautifulSoup
+from threading import Thread, Condition
+from collections import deque
+from .base_model import BaseLLMModel, CallbackToIterator, ChuanhuCallbackHandler
+from ..config import default_chuanhu_assistant_model
+from ..presets import SUMMARIZE_PROMPT, i18n
+from ..index_func import construct_index
+from langchain.callbacks import get_openai_callback
+import os
+import gradio as gr
+import logging
+class WebBrowsingInput(BaseModel):
+    url: str = Field(description="URL of a webpage")
+class WebAskingInput(BaseModel):
+    url: str = Field(description="URL of a webpage")
+    question: str = Field(description="Question that you want to know the answer to, based on the webpage's content.")
+class ChuanhuAgent_Client(BaseLLMModel):
+    def __init__(self, model_name, openai_api_key, user_name="") -> None:
+        super().__init__(model_name=model_name, user=user_name)
+        self.text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=30)
+        self.api_key = openai_api_key
+        self.llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0, model_name=default_chuanhu_assistant_model)
+        self.cheap_llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0, model_name="gpt-3.5-turbo")
+        PROMPT = PromptTemplate(template=SUMMARIZE_PROMPT, input_variables=["text"])
+        self.summarize_chain = load_summarize_chain(self.cheap_llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
+        self.index_summary = None
+        self.index = None
+        if "Pro" in self.model_name:
+            self.tools = load_tools(["google-search-results-json", "llm-math", "arxiv", "wikipedia", "wolfram-alpha"], llm=self.llm)
+        else:
+            self.tools = load_tools(["ddg-search", "llm-math", "arxiv", "wikipedia"], llm=self.llm)
+        self.tools.append(
+            Tool.from_function(
+                func=self.summary_url,
+                name="Summary Webpage",
+                description="useful when you need to know the overall content of a webpage.",
+                args_schema=WebBrowsingInput
+            )
+        )
+        self.tools.append(
+            StructuredTool.from_function(
+                func=self.ask_url,
+                name="Ask Webpage",
+                description="useful when you need to ask detailed questions about a webpage.",
+                args_schema=WebAskingInput
+            )
+        )
+    def handle_file_upload(self, files, chatbot, language):
+        """if the model accepts multi modal input, implement this function"""
+        status = gr.Markdown.update()
+        if files:
+            index = construct_index(self.api_key, file_src=files)
+            assert index is not None, "获取索引失败"
+            self.index = index
+            status = i18n("索引构建完成")
+            # Summarize the document
+            logging.info(i18n("生成内容总结中……"))
+            with get_openai_callback() as cb:
+                os.environ["OPENAI_API_KEY"] = self.api_key
+                from langchain.chains.summarize import load_summarize_chain
+                from langchain.prompts import PromptTemplate
+                from langchain.chat_models import ChatOpenAI
+                prompt_template = "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN " + language + ":"
+                PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
+                llm = ChatOpenAI()
+                chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
+                summary = chain({"input_documents": list(index.docstore.__dict__["_dict"].values())}, return_only_outputs=True)["output_text"]
+                logging.info(f"Summary: {summary}")
+                self.index_summary = summary
+            logging.info(cb)
+        return gr.Files.update(), chatbot, status
+    def query_index(self, query):
+        if self.index is not None:
+            retriever = self.index.as_retriever()
+            qa = RetrievalQA.from_chain_type(llm=self.llm, chain_type="stuff", retriever=retriever)
+            return qa.run(query)
+        else:
+            "Error during query."
+    def summary(self, text):
+        texts = Document(page_content=text)
+        texts = self.text_splitter.split_documents([texts])
+        return self.summarize_chain({"input_documents": texts}, return_only_outputs=True)["output_text"]
+    def fetch_url_content(self, url):
+        response = requests.get(url)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # 提取所有的文本
+        text = ''.join(s.getText() for s in soup.find_all('p'))
+        logging.info(f"Extracted text from {url}")
+        return text
+    def summary_url(self, url):
+        text = self.fetch_url_content(url)
+        text_summary = self.summary(text)
+        url_content = "webpage content summary:\n" + text_summary
+        return url_content
+    def ask_url(self, url, question):
+        text = self.fetch_url_content(url)
+        texts = Document(page_content=text)
+        texts = self.text_splitter.split_documents([texts])
+        # use embedding
+        embeddings = OpenAIEmbeddings(openai_api_key=self.api_key)
+        # create vectorstore
+        db = FAISS.from_documents(texts, embeddings)
+        retriever = db.as_retriever()
+        qa = RetrievalQA.from_chain_type(llm=self.cheap_llm, chain_type="stuff", retriever=retriever)
+        return qa.run(f"{question} Reply in 中文")
+    def get_answer_at_once(self):
+        question = self.history[-1]["content"]
+        # llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
+        agent = initialize_agent(self.tools, self.llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
+        reply = agent.run(input=f"{question} Reply in 简体中文")
+        return reply, -1
+    def get_answer_stream_iter(self):
+        question = self.history[-1]["content"]
+        it = CallbackToIterator()
+        manager = BaseCallbackManager(handlers=[ChuanhuCallbackHandler(it.callback)])
+        def thread_func():
+            tools = self.tools
+            if self.index is not None:
+                    tools.append(
+                        Tool.from_function(
+                        func=self.query_index,
+                        name="Query Knowledge Base",
+                        description=f"useful when you need to know about: {self.index_summary}",
+                        args_schema=WebBrowsingInput
+                    )
+                )
+            agent = initialize_agent(self.tools, self.llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True, callback_manager=manager)
+            reply = agent.run(input=f"{question} Reply in 简体中文")
+            it.callback(reply)
+            it.finish()
+        t = Thread(target=thread_func)
+        t.start()
+        partial_text = ""
+        for value in it:
+            partial_text += value
+            yield partial_text

modules/models/PaLM.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from .base_model import BaseLLMModel, CallbackToIterator, ChuanhuCallbackHandler
+from langchain.chat_models import ChatGooglePalm
+import os
+class PaLM_Client(BaseLLMModel):
+    def __init__(self, model_name, user="") -> None:
+        super().__init__(model_name, user)
+        self.llm = ChatGooglePalm(google_api_key=os.environ["GOOGLE_PALM_API_KEY"])
+    def get_answer_at_once(self):
+        self.llm.generate(self.history)

modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc and b/modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc differ

modules/models/__pycache__/base_model.cpython-39.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/base_model.cpython-39.pyc and b/modules/models/__pycache__/base_model.cpython-39.pyc differ

modules/models/__pycache__/models.cpython-39.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/models.cpython-39.pyc and b/modules/models/__pycache__/models.cpython-39.pyc differ

modules/models/base_model.py CHANGED Viewed

@@ -18,12 +18,85 @@ import asyncio
 import aiohttp
 from enum import Enum
 from ..presets import *
-from ..llama_func import *
 from ..utils import *
 from .. import shared
 from ..config import retrieve_proxy
 class ModelType(Enum):
     Unknown = -1
@@ -34,6 +107,8 @@ class ModelType(Enum):
     StableLM = 4
     MOSS = 5
     YuanAI = 6
     @classmethod
     def get_type(cls, model_name: str):
@@ -53,6 +128,10 @@ class ModelType(Enum):
             model_type = ModelType.MOSS
         elif "yuanai" in model_name_lower:
             model_type = ModelType.YuanAI
         else:
             model_type = ModelType.Unknown
         return model_type
@@ -178,12 +257,12 @@ class BaseLLMModel:
         status_text = self.token_message()
         return chatbot, status_text
-    def handle_file_upload(self, files, chatbot):
         """if the model accepts multi modal input, implement this function"""
         status = gr.Markdown.update()
         if files:
-            construct_index(self.api_key, file_src=files)
-            status = "索引构建完成"
         return gr.Files.update(), chatbot, status
     def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
@@ -192,53 +271,20 @@ class BaseLLMModel:
         limited_context = False
         fake_inputs = real_inputs
         if files:
-            from llama_index.indices.vector_store.base_query import GPTVectorStoreIndexQuery
-            from llama_index.indices.query.schema import QueryBundle
             from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-            from langchain.chat_models import ChatOpenAI
-            from llama_index import (
-                GPTSimpleVectorIndex,
-                ServiceContext,
-                LangchainEmbedding,
-                OpenAIEmbedding,
-            )
             limited_context = True
             msg = "加载索引中……"
             logging.info(msg)
-            # yield chatbot + [(inputs, "")], msg
             index = construct_index(self.api_key, file_src=files)
             assert index is not None, "获取索引失败"
             msg = "索引获取成功，���成回答中……"
             logging.info(msg)
-            if local_embedding or self.model_type != ModelType.OpenAI:
-                embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name = "sentence-transformers/distiluse-base-multilingual-cased-v2"))
-            else:
-                embed_model = OpenAIEmbedding()
-            # yield chatbot + [(inputs, "")], msg
             with retrieve_proxy():
-                prompt_helper = PromptHelper(
-                    max_input_size=4096,
-                    num_output=5,
-                    max_chunk_overlap=20,
-                    chunk_size_limit=600,
-                )
-                from llama_index import ServiceContext
-                service_context = ServiceContext.from_defaults(
-                    prompt_helper=prompt_helper, embed_model=embed_model
-                )
-                query_object = GPTVectorStoreIndexQuery(
-                    index.index_struct,
-                    service_context=service_context,
-                    similarity_top_k=5,
-                    vector_store=index._vector_store,
-                    docstore=index._docstore,
-                    response_synthesizer=None
-                )
-                query_bundle = QueryBundle(real_inputs)
-                nodes = query_object.retrieve(query_bundle)
-            reference_results = [n.node.text for n in nodes]
-            reference_results = add_source_numbers(reference_results, use_source=False)
             display_append = add_details(reference_results)
             display_append = "\n\n" + "".join(display_append)
             real_inputs = (

 import aiohttp
 from enum import Enum
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.callbacks.manager import BaseCallbackManager
+from typing import Any, Dict, List, Optional, Union
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.input import print_text
+from langchain.schema import AgentAction, AgentFinish, LLMResult
+from threading import Thread, Condition
+from collections import deque
 from ..presets import *
+from ..index_func import *
 from ..utils import *
 from .. import shared
 from ..config import retrieve_proxy
+class CallbackToIterator:
+    def __init__(self):
+        self.queue = deque()
+        self.cond = Condition()
+        self.finished = False
+    def callback(self, result):
+        with self.cond:
+            self.queue.append(result)
+            self.cond.notify()  # Wake up the generator.
+    def __iter__(self):
+        return self
+    def __next__(self):
+        with self.cond:
+            while not self.queue and not self.finished:  # Wait for a value to be added to the queue.
+                self.cond.wait()
+            if not self.queue:
+                raise StopIteration()
+            return self.queue.popleft()
+    def finish(self):
+        with self.cond:
+            self.finished = True
+            self.cond.notify()  # Wake up the generator if it's waiting.
+class ChuanhuCallbackHandler(BaseCallbackHandler):
+    def __init__(self, callback) -> None:
+        """Initialize callback handler."""
+        self.callback = callback
+    def on_agent_action(
+        self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
+    ) -> Any:
+        self.callback(action.log)
+    def on_tool_end(
+        self,
+        output: str,
+        color: Optional[str] = None,
+        observation_prefix: Optional[str] = None,
+        llm_prefix: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        """If not the final action, print out observation."""
+        if observation_prefix is not None:
+            self.callback(f"\n\n{observation_prefix}")
+        self.callback(output)
+        if llm_prefix is not None:
+            self.callback(f"\n\n{llm_prefix}")
+    def on_agent_finish(
+        self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
+    ) -> None:
+        self.callback(f"{finish.log}\n\n")
+    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
+        """Run on new LLM token. Only available when streaming is enabled."""
+        self.callback(token)
 class ModelType(Enum):
     Unknown = -1
     StableLM = 4
     MOSS = 5
     YuanAI = 6
+    ChuanhuAgent = 7
+    PaLM = 8
     @classmethod
     def get_type(cls, model_name: str):
             model_type = ModelType.MOSS
         elif "yuanai" in model_name_lower:
             model_type = ModelType.YuanAI
+        elif "川虎助理" in model_name_lower:
+            model_type = ModelType.ChuanhuAgent
+        elif "palm" in model_name_lower:
+            model_type = ModelType.PaLM
         else:
             model_type = ModelType.Unknown
         return model_type
         status_text = self.token_message()
         return chatbot, status_text
+    def handle_file_upload(self, files, chatbot, language):
         """if the model accepts multi modal input, implement this function"""
         status = gr.Markdown.update()
         if files:
+            index = construct_index(self.api_key, file_src=files)
+            status = i18n("索引构建完成")
         return gr.Files.update(), chatbot, status
     def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
         limited_context = False
         fake_inputs = real_inputs
         if files:
             from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+            from langchain.vectorstores.base import VectorStoreRetriever
             limited_context = True
             msg = "加载索引中……"
             logging.info(msg)
             index = construct_index(self.api_key, file_src=files)
             assert index is not None, "获取索引失败"
             msg = "索引获取成功，���成回答中……"
             logging.info(msg)
             with retrieve_proxy():
+                retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold",search_kwargs={"k":6, "score_threshold": 0.5})
+                relevant_documents = retriever.get_relevant_documents(real_inputs)
+            reference_results = [[d.page_content.strip("�"), os.path.basename(d.metadata["source"])] for d in relevant_documents]
+            reference_results = add_source_numbers(reference_results)
             display_append = add_details(reference_results)
             display_append = "\n\n" + "".join(display_append)
             real_inputs = (

modules/models/models.py CHANGED Viewed

@@ -22,7 +22,7 @@ from enum import Enum
 import uuid
 from ..presets import *
-from ..llama_func import *
 from ..utils import *
 from .. import shared
 from ..config import retrieve_proxy, usage_limit
@@ -494,7 +494,7 @@ class XMChat(BaseLLMModel):
         limited_context = False
         return limited_context, fake_inputs, display_append, real_inputs, chatbot
-    def handle_file_upload(self, files, chatbot):
         """if the model accepts multi modal input, implement this function"""
         if files:
             for file in files:
@@ -557,6 +557,7 @@ def get_model(
         config.local_embedding = True
     # del current_model.model
     model = None
     try:
         if model_type == ModelType.OpenAI:
             logging.info(f"正在加载OpenAI模型: {model_name}")
@@ -602,10 +603,15 @@ def get_model(
         elif model_type == ModelType.YuanAI:
             from .inspurai import Yuan_Client
             model = Yuan_Client(model_name, api_key=access_key, user_name=user_name, system_prompt=system_prompt)
         elif model_type == ModelType.Unknown:
             raise ValueError(f"未知模型: {model_name}")
         logging.info(msg)
-        chatbot = gr.Chatbot.update(label=model_name)
     except Exception as e:
         logging.error(e)
         msg = f"{STANDARD_ERROR_MSG}: {e}"

 import uuid
 from ..presets import *
+from ..index_func import *
 from ..utils import *
 from .. import shared
 from ..config import retrieve_proxy, usage_limit
         limited_context = False
         return limited_context, fake_inputs, display_append, real_inputs, chatbot
+    def handle_file_upload(self, files, chatbot, language):
         """if the model accepts multi modal input, implement this function"""
         if files:
             for file in files:
         config.local_embedding = True
     # del current_model.model
     model = None
+    chatbot = gr.Chatbot.update(label=model_name)
     try:
         if model_type == ModelType.OpenAI:
             logging.info(f"正在加载OpenAI模型: {model_name}")
         elif model_type == ModelType.YuanAI:
             from .inspurai import Yuan_Client
             model = Yuan_Client(model_name, api_key=access_key, user_name=user_name, system_prompt=system_prompt)
+        elif model_type == ModelType.ChuanhuAgent:
+            from .ChuanhuAgent import ChuanhuAgent_Client
+            model = ChuanhuAgent_Client(model_name, access_key, user_name=user_name)
+        elif model_type == ModelType.PaLM:
+            from .PaLM import PaLM_Client
+            model = PaLM_Client(model_name, user_name=user_name)
         elif model_type == ModelType.Unknown:
             raise ValueError(f"未知模型: {model_name}")
         logging.info(msg)
     except Exception as e:
         logging.error(e)
         msg = f"{STANDARD_ERROR_MSG}: {e}"

modules/overwrites.py CHANGED Viewed

@@ -1,24 +1,14 @@
 from __future__ import annotations
 import logging
-from llama_index import Prompt
 from typing import List, Tuple
 import mdtex2html
 from gradio_client import utils as client_utils
 from modules.presets import *
-from modules.llama_func import *
 from modules.config import render_latex
-def compact_text_chunks(self, prompt: Prompt, text_chunks: List[str]) -> List[str]:
-    logging.debug("Compacting text chunks...🚀🚀🚀")
-    combined_str = [c.strip() for c in text_chunks if c.strip()]
-    combined_str = [f"[{index+1}] {c}" for index, c in enumerate(combined_str)]
-    combined_str = "\n\n".join(combined_str)
-    # resplit based on self.max_chunk_overlap
-    text_splitter = self.get_text_splitter_given_prompt(prompt, 1, padding=1)
-    return text_splitter.split_text(combined_str)
 def postprocess(
         self,

 from __future__ import annotations
 import logging
 from typing import List, Tuple
 import mdtex2html
 from gradio_client import utils as client_utils
 from modules.presets import *
+from modules.index_func import *
 from modules.config import render_latex
 def postprocess(
         self,

modules/pdf_func.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from types import SimpleNamespace
 import pdfplumber
 import logging
-from llama_index import Document
 def prepare_table_config(crop_page):
     """Prepare table查找边界, 要求page为原始page
     From https://github.com/jsvine/pdfplumber/issues/242
     """
     page = crop_page.root_page # root/parent
@@ -60,7 +60,7 @@ def get_title_with_cropped_page(first_page):
             title_bottom = word.bottom
         elif word.text == "Abstract": # 获取页面abstract
             top = word.top
     user_info = [i["text"] for i in extract_words(first_page.within_bbox((x0,title_bottom,x1,top)))]
     # 裁剪掉上半部分, within_bbox: full_included; crop: partial_included
     return title, user_info, first_page.within_bbox((x0,top,x1,bottom))
@@ -75,7 +75,7 @@ def get_column_cropped_pages(pages, two_column=True):
             new_pages.append(right)
         else:
             new_pages.append(page)
     return new_pages
 def parse_pdf(filename, two_column = True):
@@ -94,7 +94,7 @@ def parse_pdf(filename, two_column = True):
             name_top=name_top,
             name_bottom=name_bottom,
             record_chapter_name = True,
             page_start=page_start,
             page_stop=None,
@@ -114,7 +114,7 @@ def parse_pdf(filename, two_column = True):
                 if word.size >= 11: # 出现chapter name
                     if cur_chapter is None:
                         cur_chapter = create_chapter(page.page_number, word.top, word.bottom)
-                    elif not cur_chapter.record_chapter_name or (cur_chapter.name_bottom != cur_chapter.name_bottom and cur_chapter.name_top != cur_chapter.name_top):
                         # 不再继续写chapter name
                         cur_chapter.page_stop = page.page_number # stop id
                         chapters.append(cur_chapter)
@@ -143,7 +143,7 @@ def parse_pdf(filename, two_column = True):
         text += f"The {idx}th Chapter {chapter.name}: " + " ".join(chapter.text) + "\n"
     logging.getLogger().setLevel(level)
-    return Document(text=text, extra_info={"title": title})
 BASE_POINTS = """
 1. Who are the authors?

 from types import SimpleNamespace
 import pdfplumber
 import logging
+from langchain.docstore.document import Document
 def prepare_table_config(crop_page):
     """Prepare table查找边界, 要求page为原始page
     From https://github.com/jsvine/pdfplumber/issues/242
     """
     page = crop_page.root_page # root/parent
             title_bottom = word.bottom
         elif word.text == "Abstract": # 获取页面abstract
             top = word.top
     user_info = [i["text"] for i in extract_words(first_page.within_bbox((x0,title_bottom,x1,top)))]
     # 裁剪掉上半部分, within_bbox: full_included; crop: partial_included
     return title, user_info, first_page.within_bbox((x0,top,x1,bottom))
             new_pages.append(right)
         else:
             new_pages.append(page)
     return new_pages
 def parse_pdf(filename, two_column = True):
             name_top=name_top,
             name_bottom=name_bottom,
             record_chapter_name = True,
             page_start=page_start,
             page_stop=None,
                 if word.size >= 11: # 出现chapter name
                     if cur_chapter is None:
                         cur_chapter = create_chapter(page.page_number, word.top, word.bottom)
+                    elif not cur_chapter.record_chapter_name or (cur_chapter.name_bottom != cur_chapter.name_bottom and cur_chapter.name_top != cur_chapter.name_top):
                         # 不再继续写chapter name
                         cur_chapter.page_stop = page.page_number # stop id
                         chapters.append(cur_chapter)
         text += f"The {idx}th Chapter {chapter.name}: " + " ".join(chapter.text) + "\n"
     logging.getLogger().setLevel(level)
+    return Document(page_content=text, metadata={"title": title})
 BASE_POINTS = """
 1. Who are the authors?

modules/presets.py CHANGED Viewed

@@ -58,9 +58,9 @@ APPEARANCE_SWITCHER = """
 </div>
 """
-SUMMARIZE_PROMPT = "你是谁？我们刚才聊了什么？"  # 总结对话时的 prompt
 ONLINE_MODELS = [
     "gpt-3.5-turbo",
     "gpt-3.5-turbo-0301",
     "gpt-4",
@@ -68,6 +68,7 @@ ONLINE_MODELS = [
     "gpt-4-32k",
     "gpt-4-32k-0314",
     "xmchat",
     "yuanai-1.0-base_10B",
     "yuanai-1.0-translate",
     "yuanai-1.0-dialog",
@@ -164,6 +165,12 @@ Reply in {reply_language}
 If the context isn't useful, return the original answer.
 """
 ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
 small_and_beautiful_theme = gr.themes.Soft(

 </div>
 """
 ONLINE_MODELS = [
+    "川虎助理",
+    "川虎助理 Pro",
     "gpt-3.5-turbo",
     "gpt-3.5-turbo-0301",
     "gpt-4",
     "gpt-4-32k",
     "gpt-4-32k-0314",
     "xmchat",
+    "Google PaLM",
     "yuanai-1.0-base_10B",
     "yuanai-1.0-translate",
     "yuanai-1.0-dialog",
 If the context isn't useful, return the original answer.
 """
+SUMMARIZE_PROMPT = """Write a concise summary of the following:
+{text}
+CONCISE SUMMARY IN 中文:"""
 ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
 small_and_beautiful_theme = gr.themes.Soft(

requirements.txt CHANGED Viewed

@@ -8,11 +8,20 @@ tqdm
 colorama
 duckduckgo_search==2.9.5
 Pygments
-llama_index==0.5.25
-langchain<0.0.150
 markdown
 PyPDF2
 pdfplumber
 pandas
 commentjson
 openpyxl

 colorama
 duckduckgo_search==2.9.5
 Pygments
+langchain==0.0.170
 markdown
 PyPDF2
 pdfplumber
 pandas
 commentjson
 openpyxl
+pandoc
+wolframalpha
+faiss-cpu
+google-search-results
+arxiv
+wikipedia
+google.generativeai
+openai
+unstructured
+google-api-python-client