Spaces:

hyattDD
/

WordsApp

Sleeping

App Files Files Community

dht-tb16p commited on May 28

Commit

e60c070

•

1 Parent(s): 7a0d66d

Commit 1st version

Browse files

Files changed (46) hide show

__init__.py +0 -0
app.py +38 -0
chat.py +171 -0
config.py +1 -0
create_db.py +149 -0
database/__pycache__/create_db.cpython-311.pyc +0 -0
database/word_database.db +0 -0
embedding/__init__.py +1 -0
embedding/__pycache__/__init__.cpython-310.pyc +0 -0
embedding/__pycache__/__init__.cpython-311.pyc +0 -0
embedding/__pycache__/__init__.cpython-39.pyc +0 -0
embedding/__pycache__/call_embedding.cpython-310.pyc +0 -0
embedding/__pycache__/call_embedding.cpython-311.pyc +0 -0
embedding/__pycache__/call_embedding.cpython-39.pyc +0 -0
embedding/__pycache__/zhipuai_embedding.cpython-310.pyc +0 -0
embedding/__pycache__/zhipuai_embedding.cpython-311.pyc +0 -0
embedding/__pycache__/zhipuai_embedding.cpython-39.pyc +0 -0
embedding/call_embedding.py +20 -0
embedding/zhipuai_embedding.py +112 -0
llm/__pycache__/call_llm.cpython-310.pyc +0 -0
llm/__pycache__/call_llm.cpython-311.pyc +0 -0
llm/__pycache__/call_llm.cpython-39.pyc +0 -0
llm/__pycache__/self_llm.cpython-310.pyc +0 -0
llm/__pycache__/self_llm.cpython-311.pyc +0 -0
llm/__pycache__/self_llm.cpython-39.pyc +0 -0
llm/__pycache__/spark_llm.cpython-310.pyc +0 -0
llm/__pycache__/spark_llm.cpython-311.pyc +0 -0
llm/__pycache__/spark_llm.cpython-39.pyc +0 -0
llm/__pycache__/wenxin_llm.cpython-310.pyc +0 -0
llm/__pycache__/wenxin_llm.cpython-311.pyc +0 -0
llm/__pycache__/wenxin_llm.cpython-39.pyc +0 -0
llm/__pycache__/wenxin_llm_.cpython-310.pyc +0 -0
llm/__pycache__/zhipuai_llm.cpython-310.pyc +0 -0
llm/__pycache__/zhipuai_llm.cpython-311.pyc +0 -0
llm/__pycache__/zhipuai_llm.cpython-39.pyc +0 -0
llm/call_llm.py +330 -0
llm/self_llm.py +47 -0
llm/spark_llm.py +227 -0
llm/test.ipynb +312 -0
llm/wenxin_llm.py +90 -0
llm/zhipuai_llm.py +217 -0
prompts.py +84 -0
requirements.txt +157 -0
test/__pycache__/test_create_db.cpython-311-pytest-8.2.0.pyc +0 -0
test/test_create_db.py +44 -0
words_db.py +67 -0

__init__.py ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import gradio as gr
+import openai
+from create_db import create_db
+from chat import respond
+openai.api_base = "https://api.v36.cm/v1"  # global setting is needed
+def launch_app():
+    with gr.Blocks() as demo:
+        with gr.Row(equal_height=True):
+            gr.Markdown("## 英语单词学习工具")
+        with gr.Row():
+            with gr.Column(scale=4):
+                chatbot = gr.Chatbot(height=400)
+                msg = gr.Textbox(label="在此输入指令（以:起始）或对话")
+                btn = gr.Button("Submit")
+                gr.ClearButton(components=[msg, chatbot], value="清除对话")
+                btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
+                msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
+            with gr.Column(scale=1):
+                file = gr.File(label='请导入制作词库的文件', file_count='single', file_types=['.md', '.pdf'])
+                with gr.Row():
+                    init_vocab_by_file = gr.Button("使用文件生成个人词库")
+                text = gr.Textbox(label="在此粘贴制作词库的文本", lines=8)
+                with gr.Row():
+                    init_vocab_by_text = gr.Button("使用文本生成个人词库")
+            init_vocab_by_file.click(create_db, inputs=[file, chatbot], outputs=[chatbot])
+            init_vocab_by_text.click(create_db, inputs=[text, chatbot], outputs=[chatbot])
+    gr.close_all()
+    demo.launch()
+if __name__ == "__main__":
+    launch_app()

chat.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import re
+from typing import List
+from loguru import logger
+import config
+from llm.call_llm import get_completion, get_completion_from_messages
+from words_db import words_db
+from create_db import get_similar_k_words
+from prompts import trans_prompt, query_prompt, learn_prompt
+from prompts import system_message_mapper
+def format_common_prompt(raw_prompt, variable):
+    """get format prompt by repalce variable in raw_prompt
+    """
+    return raw_prompt.format(variable)
+def format_chat_prompt(message, chat_history) -> str:
+    """get format prompt
+    """
+    prompt = ""
+    for turn in chat_history:  # add history info
+        user_message, bot_message = turn
+        prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
+    prompt = f"{prompt}\nUser: {message}\nAssistant:"
+    return prompt
+def respond(message, chat_history,
+            llm="gpt-3.5-turbo", history_len=3, temperature=0.1, max_tokens=2048):
+    """get respond from LLM
+    """
+    # deal with commands
+    respond_message = command_parser(message)
+    if respond_message:
+        chat_history.append((message, respond_message))
+        respond_message = ""
+        return respond_message, chat_history
+    # map natural language to command
+    respond_message = command_mapper(message)
+    if respond_message:
+        chat_history.append((message, respond_message))
+        respond_message = ""
+        return respond_message, chat_history
+    # no commands return, so chat with LLM
+    if message is None or len(message) < 1:
+            return "", chat_history
+    try:
+        chat_history = chat_history[-history_len:] if history_len > 0 else []  # constrain history length
+        formatted_prompt = format_chat_prompt(message, chat_history)  # format prompt
+        bot_message = get_completion(
+            formatted_prompt,
+            llm,
+            api_key=config.api_key,
+            temperature=temperature, max_tokens=max_tokens)
+        bot_message = re.sub(r"\\n", '<br/>', bot_message) # replace \n with <br/>
+        chat_history.append((message, bot_message))
+        return "", chat_history
+    except Exception as e:
+        return e, chat_history
+def command_parser(input: str) -> str:
+    """parse 4 type commands
+    1. :add
+    2. :remove
+    3. :learn
+    4. :query
+    return info of action to user
+    """
+    if input.startswith(":add"):
+        words = input.split(" ")[1:]
+        info = add_words(words)
+        return info
+    if input.startswith(":remove"):
+        words = input.split(" ")[1:]
+        info = remove_words(words)
+        return info
+    if input.startswith(":learn"):
+        if len(input.split(" ")) != 2:
+            return "学习模式将基于词库进行，请指定一个query单词"
+        query = input.split(" ")[1]
+        info = learn_words(query)
+        return f"Based on your query word: {query} and dictionary, learning sentence is:\n{info}"
+    if input.startswith(":query"):
+        if len(input.split(" ")) > 2:
+            return "查询模式仅支持单个单词，请使用:query <word>进行查询"
+        word = input.split(" ")[1]
+        info = query_word(word)
+        return f"{word}\n{info}"
+    if input.startswith(":show"):
+        info = show_all_words()
+        return info
+    if input.startswith(":help"):
+        return "目前支持的指令有:\n:add <word1> <word2> ...\n:remove <word1> <word2> ...\n:learn <query_word>\n:query <word>"
+    return ""
+def show_all_words() -> str:
+    """show all words in db
+    """
+    try:
+        all_words = words_db.query_word()
+        return f"目前词库中的所有单词:\n{all_words}"
+    except Exception as e:
+        logger.error(str(e))
+        return "查询失败"
+def add_words(input: List[str]):
+    word_tuple_list = [
+        (word, get_completion(
+            prompt=format_common_prompt(trans_prompt, word),
+            api_key=config.api_key))
+        for word in input
+    ]
+    try:
+        for word_tuple in word_tuple_list:
+            word, definition = word_tuple
+            words_db.add_word(word, definition)
+            logger.info(f"已经添加单词: {word} 和其释义: {definition}")
+    except Exception as e:
+        logger.error(str(e))
+        return f"添加单词失败: {input}"
+    return f"已添加单词: {input}"
+def remove_words(input: List[str]):
+    try:
+        for word in input:
+            words_db.delete_word(word)
+            logger.info(f"已经删除单词: {word} 和其释义")
+    except Exception as e:
+        logger.error(str(e))
+        return f"删除单词失败: {input}"
+    return f"已删除单词: {input}"
+def learn_words(query_word) -> str:
+    # get top 3 words from vec db and generate material
+    words = get_similar_k_words(query_word)
+    respond = get_completion(
+        prompt=format_common_prompt(learn_prompt, words),
+        api_key=config.api_key)
+    logger.info(f"进入学习模式，学习下列单词: {words}")
+    return respond
+def query_word(input: str) -> str:
+    # just query infomation about a word from gpt
+    respond = get_completion(
+        prompt=format_common_prompt(query_prompt, input),
+        api_key=config.api_key)
+    logger.info(f"查询单词: {input}")
+    return respond
+def command_mapper(input: str) -> str:
+    """map natural language to command, return command function
+    """
+    user_message = input
+    messages =  [
+        {'role':'system',
+        'content': system_message_mapper},
+        {'role':'user',
+        'content': f"{user_message}"},
+    ]
+    respond = get_completion_from_messages(messages, api_key=config)
+    mapped_command = command_parser(respond)
+    logger.info(f"用户输入: {user_message}\n指令解析器输出: {mapped_command}")
+    return mapped_command

config.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ api_key = "sk-wmeFmoPXQ9wlJKiP48B0F028E6534359A6980b9585Ba5bAc"

create_db.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+import tempfile
+import config
+import nltk
+from typing import List
+from nltk.corpus import words
+from loguru import logger
+from llm.call_llm import get_completion_from_messages
+from embedding.call_embedding import get_embedding
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import PyMuPDFLoader
+from langchain.vectorstores import Chroma
+from prompts import system_message_select
+WORDS_DB_PATH = "../words_db"
+VECTOR_DB_PATH = "./vector_db/chroma"
+def parse_file(file_path):
+    docs = []
+    # check file type
+    file_type = file_path.split('.')[-1]
+    if file_type == 'pdf':
+        loader = PyMuPDFLoader(file_path)
+        content = loader.load()
+        docs.extend(content)
+    else:
+        return "File type not supported"
+    if len(docs) > 5:
+        return "File too large, please select a pdf file with less than 5 pages"
+    slices = split_text(docs)  # split content into slices
+    words = extract_words(slices)  # extract words from slices
+    try:
+        vectorize_words(words)   # store words into vector database
+    except Exception as e:
+        logger.error(e)
+    return ""
+def parse_text(input: str):
+    content = input
+    return content
+def split_text(docs: List[object]):
+    """Split text into slices"""
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size = 1500,
+        chunk_overlap = 150
+    )
+    splits = text_splitter.split_documents(docs)
+    logger.info(f"Split {len(docs)} pages document into {len(splits)} slices")
+    return splits
+def extract_words(splits: List[object]):
+    """Extract words from slices"""
+    all_words = []
+    for slice in splits:
+        tmp_content = slice.page_content
+        messages =  [
+            {'role':'system',
+            'content': system_message_select},
+            {'role':'user',
+            'content': f"{tmp_content}"},
+        ]
+        respond = get_completion_from_messages(messages, api_key=config.api_key)
+        words_list = respond.split(", ")
+        if len(words_list) == 0:
+            continue
+        else:
+            all_words.extend(words_list)
+    all_words = wash_words(all_words)
+    logger.info(f"Extract {len(all_words)} words from slices")
+    return all_words
+def wash_words(input_words: list[str]):
+    """Wash words into a list of correct english words"""
+    words_list = [word for word in input_words
+                      if len(word) >= 3 and len(word) <= 30]
+    nltk.download('words')
+    english_words = set(words.words())
+    filtered_words = [word.lower() for word in words_list if word.lower() in english_words]
+    filtered_words = list(set(filtered_words))
+    logger.info(f"Wash {len(filtered_words)} words into a list of correct english words")
+    return filtered_words
+def get_words_from_text(input: str):
+    words = input.split(' ')
+    return words
+def store_words(input: str, db_path=WORDS_DB_PATH):
+    """Store words into database"""
+    pass
+def vectorize_words(input: list[str], embedding=None):
+    """Vectorize words into vectors"""
+    model = get_embedding("openai", embedding_key=config.api_key)
+    persist_path = VECTOR_DB_PATH
+    vectordb = Chroma.from_texts(
+        texts=input,
+        embedding=model,
+        persist_directory=persist_path
+    )
+    vectordb.persist()
+    logger.info(f"Vectorized {len(input)} words into vectors")
+    return vectordb
+def get_similar_k_words(query_word, k=3) -> List[str]:
+    # get 3 simlilar words from DB
+    model = get_embedding("openai", embedding_key=config.api_key)
+    vectordb = Chroma(persist_directory=VECTOR_DB_PATH, embedding_function=model)
+    similar_words = vectordb.max_marginal_relevance_search(query_word, k=k)
+    similar_words = [word.page_content for word in similar_words]
+    logger.info(f"Get {k} similar words {similar_words} from DB")
+    return similar_words
+def create_db(input, chat_history):
+    """The input is file or text"""
+    action_msg = ""  # the description of user action: put file or text into database
+    # 1. for file upload
+    if isinstance(input, tempfile._TemporaryFileWrapper):
+        tmp_file_path = input.name
+        file_name = tmp_file_path.split('/')[-1]
+        action_msg = f"Add words from my file: {file_name} to database"
+        try:
+            parse_file(tmp_file_path) #TODO
+            output = f"Words from your file: {file_name} has been added to database"
+        except Exception as e:
+            logger.error(e)
+            output = f"Error: failed to use your file: {file_name} generate dictionary"
+    # 2. for text input
+    elif isinstance(input, str):
+        action_msg = f"Add words from my text: {input} to database"
+        try:
+            parse_text(input)  #TODO
+            output = f"Words from your text: {input} has been added to database"
+        except Exception as e:
+            logger.error(e)
+            output = f"Error: failed to use your text: {input} generate dictionary"
+    chat_history.append((action_msg, output))
+    return chat_history
+if __name__ == "__main__":
+    create_db(embeddings="m3e")

database/__pycache__/create_db.cpython-311.pyc ADDED Viewed

Binary file (3.33 kB). View file

database/word_database.db ADDED Viewed

Binary file (8.19 kB). View file

embedding/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # from .zhipuai_embedding import ZhipuAIEmbeddings

embedding/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (163 Bytes). View file

embedding/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (158 Bytes). View file

embedding/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (152 Bytes). View file

embedding/__pycache__/call_embedding.cpython-310.pyc ADDED Viewed

Binary file (956 Bytes). View file

embedding/__pycache__/call_embedding.cpython-311.pyc ADDED Viewed

Binary file (1.48 kB). View file

embedding/__pycache__/call_embedding.cpython-39.pyc ADDED Viewed

Binary file (937 Bytes). View file

embedding/__pycache__/zhipuai_embedding.cpython-310.pyc ADDED Viewed

Binary file (4.23 kB). View file

embedding/__pycache__/zhipuai_embedding.cpython-311.pyc ADDED Viewed

Binary file (5.45 kB). View file

embedding/__pycache__/zhipuai_embedding.cpython-39.pyc ADDED Viewed

Binary file (4.2 kB). View file

embedding/call_embedding.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+from embedding.zhipuai_embedding import ZhipuAIEmbeddings
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from langchain.embeddings.openai import OpenAIEmbeddings
+from llm.call_llm import parse_llm_api_key
+def get_embedding(embedding: str, embedding_key: str=None, env_file: str=None):
+    if embedding == 'm3e':
+        return HuggingFaceEmbeddings(model_name="moka-ai/m3e-base")
+    if embedding_key is None:
+        embedding_key = parse_llm_api_key(embedding)
+    if embedding == "openai":
+        return OpenAIEmbeddings(openai_api_key=embedding_key)
+    elif embedding == "zhipuai":
+        return ZhipuAIEmbeddings(zhipuai_api_key=embedding_key)
+    else:
+        raise ValueError(f"embedding {embedding} not support ")

embedding/zhipuai_embedding.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from __future__ import annotations
+import logging
+from typing import Any, Dict, List, Optional
+from langchain.embeddings.base import Embeddings
+from langchain.pydantic_v1 import BaseModel, root_validator
+from langchain.utils import get_from_dict_or_env
+logger = logging.getLogger(__name__)
+class ZhipuAIEmbeddings(BaseModel, Embeddings):
+    """`Zhipuai Embeddings` embedding models."""
+    zhipuai_api_key: Optional[str] = None
+    """Zhipuai application apikey"""
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """
+        Validate whether zhipuai_api_key in the environment variables or
+        configuration file are available or not.
+        Args:
+            values: a dictionary containing configuration information, must include the
+            fields of zhipuai_api_key
+        Returns:
+            a dictionary containing configuration information. If zhipuai_api_key
+            are not provided in the environment variables or configuration
+            file, the original values will be returned; otherwise, values containing
+            zhipuai_api_key will be returned.
+        Raises:
+            ValueError: zhipuai package not found, please install it with `pip install
+            zhipuai`
+        """
+        values["zhipuai_api_key"] = get_from_dict_or_env(
+            values,
+            "zhipuai_api_key",
+            "ZHIPUAI_API_KEY",
+        )
+        try:
+            import zhipuai
+            zhipuai.api_key = values["zhipuai_api_key"]
+            values["client"] = zhipuai.model_api
+        except ImportError:
+            raise ValueError(
+                "Zhipuai package not found, please install it with "
+                "`pip install zhipuai`"
+            )
+        return values
+    def _embed(self, texts: str) -> List[float]:
+        # send request
+        try:
+            resp = self.client.invoke(
+                model="text_embedding",
+                prompt=texts
+            )
+        except Exception as e:
+            raise ValueError(f"Error raised by inference endpoint: {e}")
+        if resp["code"] != 200:
+            raise ValueError(
+                "Error raised by inference API HTTP code: %s, %s"
+                % (resp["code"], resp["msg"])
+            )
+        embeddings = resp["data"]["embedding"]
+        return embeddings
+    def embed_query(self, text: str) -> List[float]:
+        """
+        Embedding a text.
+        Args:
+            Text (str): A text to be embedded.
+        Return:
+            List [float]: An embedding list of input text, which is a list of floating-point values.
+        """
+        resp = self.embed_documents([text])
+        return resp[0]
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """
+        Embeds a list of text documents.
+        Args:
+            texts (List[str]): A list of text documents to embed.
+        Returns:
+            List[List[float]]: A list of embeddings for each document in the input list.
+                            Each embedding is represented as a list of float values.
+        """
+        return [self._embed(text) for text in texts]
+    async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Asynchronous Embed search docs."""
+        raise NotImplementedError(
+            "Please use `embed_documents`. Official does not support asynchronous requests")
+    async def aembed_query(self, text: str) -> List[float]:
+        """Asynchronous Embed query text."""
+        raise NotImplementedError(
+            "Please use `aembed_query`. Official does not support asynchronous requests")

llm/__pycache__/call_llm.cpython-310.pyc ADDED Viewed

Binary file (8.17 kB). View file

llm/__pycache__/call_llm.cpython-311.pyc ADDED Viewed

Binary file (14.1 kB). View file

llm/__pycache__/call_llm.cpython-39.pyc ADDED Viewed

Binary file (8.17 kB). View file

llm/__pycache__/self_llm.cpython-310.pyc ADDED Viewed

Binary file (1.57 kB). View file

llm/__pycache__/self_llm.cpython-311.pyc ADDED Viewed

Binary file (2.11 kB). View file

llm/__pycache__/self_llm.cpython-39.pyc ADDED Viewed

Binary file (1.56 kB). View file

llm/__pycache__/spark_llm.cpython-310.pyc ADDED Viewed

Binary file (6.17 kB). View file

llm/__pycache__/spark_llm.cpython-311.pyc ADDED Viewed

Binary file (10.5 kB). View file

llm/__pycache__/spark_llm.cpython-39.pyc ADDED Viewed

Binary file (6.12 kB). View file

llm/__pycache__/wenxin_llm.cpython-310.pyc ADDED Viewed

Binary file (2.93 kB). View file

llm/__pycache__/wenxin_llm.cpython-311.pyc ADDED Viewed

Binary file (4.4 kB). View file

llm/__pycache__/wenxin_llm.cpython-39.pyc ADDED Viewed

Binary file (2.99 kB). View file

llm/__pycache__/wenxin_llm_.cpython-310.pyc ADDED Viewed

Binary file (3.76 kB). View file

llm/__pycache__/zhipuai_llm.cpython-310.pyc ADDED Viewed

Binary file (5.86 kB). View file

llm/__pycache__/zhipuai_llm.cpython-311.pyc ADDED Viewed

Binary file (8.4 kB). View file

llm/__pycache__/zhipuai_llm.cpython-39.pyc ADDED Viewed

Binary file (5.6 kB). View file

llm/call_llm.py ADDED Viewed

	@@ -0,0 +1,330 @@

+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@File    :   call_llm.py
+@Time    :   2023/10/18 10:45:00
+@Author  :   Logan Zou
+@Version :   1.0
+@Contact :   loganzou0421@163.com
+@License :   (C)Copyright 2017-2018, Liugroup-NLPR-CASIA
+@Desc    :   将各个大模型的原生接口封装在一个接口
+'''
+import openai
+import json
+import requests
+import _thread as thread
+import base64
+# import datetime
+from dotenv import load_dotenv, find_dotenv
+import hashlib
+import hmac
+import os
+import queue
+from urllib.parse import urlparse
+import ssl
+from datetime import datetime
+from time import mktime
+from urllib.parse import urlencode
+from wsgiref.handlers import format_date_time
+import zhipuai
+from langchain.utils import get_from_dict_or_env
+import websocket  # 使用websocket_client
+def get_completion(prompt :str, model="gpt-3.5-turbo", temperature=0.1,api_key=None, secret_key=None, access_token=None, appid=None, api_secret=None, max_tokens=2048):
+    # 调用大模型获取回复，支持上述三种模型+gpt
+    # arguments:
+    # prompt: 输入提示
+    # model：模型名
+    # temperature: 温度系数
+    # api_key：如名
+    # secret_key, access_token：调用文心系列模型需要
+    # appid, api_secret: 调用星火系列模型需要
+    # max_tokens : 返回最长序列
+    # return: 模型返回，字符串
+    # 调用 GPT
+    if model in ["gpt-3.5-turbo", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0613", "gpt-4", "gpt-4-32k"]:
+        return get_completion_gpt(prompt, model, temperature, api_key, max_tokens)
+    elif model in ["ERNIE-Bot", "ERNIE-Bot-4", "ERNIE-Bot-turbo"]:
+        return get_completion_wenxin(prompt, model, temperature, api_key, secret_key)
+    elif model in ["Spark-1.5", "Spark-2.0"]:
+        return get_completion_spark(prompt, model, temperature, api_key, appid, api_secret, max_tokens)
+    elif model in ["chatglm_pro", "chatglm_std", "chatglm_lite"]:
+        return get_completion_glm(prompt, model, temperature, api_key, max_tokens)
+    else:
+        return "不正确的模型"
+def get_completion_gpt(prompt : str, model : str, temperature : float, api_key:str, max_tokens:int):
+    # 封装 OpenAI 原生接口
+    if api_key is None:
+        api_key = parse_llm_api_key("openai")
+    openai.api_key = api_key
+    # 具体调用
+    messages = [{"role": "user", "content": prompt}]
+    response = openai.ChatCompletion.create(
+        model=model,
+        messages=messages,
+        temperature=temperature, # 模型输出的温度系数，控制输出的随机程度
+        max_tokens = max_tokens, # 回复最大长度
+    )
+    # 调用 OpenAI 的 ChatCompletion 接口
+    return response.choices[0].message["content"]
+def get_completion_from_messages(messages, api_key, temperature=0):
+    # 封装 OpenAI 原生接口
+    if api_key is None:
+        api_key = parse_llm_api_key("openai")
+    openai.api_key = api_key
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=messages,
+        temperature=temperature, # 控制模型输出的随机程度
+    )
+    return response.choices[0].message["content"]
+def get_access_token(api_key, secret_key):
+    """
+    使用 API Key，Secret Key 获取access_token，替换下列示例中的应用API Key、应用Secret Key
+    """
+    # 指定网址
+    url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={api_key}&client_secret={secret_key}"
+    # 设置 POST 访问
+    payload = json.dumps("")
+    headers = {
+        'Content-Type': 'application/json',
+        'Accept': 'application/json'
+    }
+    # 通过 POST 访问获取账户对应的 access_token
+    response = requests.request("POST", url, headers=headers, data=payload)
+    return response.json().get("access_token")
+def get_completion_wenxin(prompt : str, model : str, temperature : float, api_key:str, secret_key : str):
+    # 封装百度文心原生接口
+    if api_key is None or secret_key is None:
+        api_key, secret_key = parse_llm_api_key("wenxin")
+    # 获取access_token
+    access_token = get_access_token(api_key, secret_key)
+    # 调用接口
+    url = f"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant?access_token={access_token}"
+    # 配置 POST 参数
+    payload = json.dumps({
+        "messages": [
+            {
+                "role": "user",# user prompt
+                "content": "{}".format(prompt)# 输入的 prompt
+            }
+        ]
+    })
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    # 发起请求
+    response = requests.request("POST", url, headers=headers, data=payload)
+    # 返回的是一个 Json 字符串
+    js = json.loads(response.text)
+    return js["result"]
+def get_completion_spark(prompt : str, model : str, temperature : float, api_key:str, appid : str, api_secret : str, max_tokens : int):
+    if api_key is None or appid is None and api_secret is None:
+        api_key, appid, api_secret = parse_llm_api_key("spark")
+    # 配置 1.5 和 2 的不同环境
+    if model == "Spark-1.5":
+        domain = "general"
+        Spark_url = "ws://spark-api.xf-yun.com/v1.1/chat"  # v1.5环境的地址
+    else:
+        domain = "generalv2"    # v2.0版本
+        Spark_url = "ws://spark-api.xf-yun.com/v2.1/chat"  # v2.0环境的地址
+    question = [{"role":"user", "content":prompt}]
+    response = spark_main(appid,api_key,api_secret,Spark_url,domain,question,temperature,max_tokens)
+    return response
+def get_completion_glm(prompt : str, model : str, temperature : float, api_key:str, max_tokens : int):
+    # 获取GLM回答
+    if api_key is None:
+        api_key = parse_llm_api_key("zhipuai")
+    zhipuai.api_key = api_key
+    response = zhipuai.model_api.invoke(
+        model=model,
+        prompt=[{"role":"user", "content":prompt}],
+        temperature = temperature,
+        max_tokens=max_tokens
+        )
+    return response["data"]["choices"][0]["content"].strip('"').strip(" ")
+# def getText(role, content, text = []):
+#     # role 是指定角色，content 是 prompt 内容
+#     jsoncon = {}
+#     jsoncon["role"] = role
+#     jsoncon["content"] = content
+#     text.append(jsoncon)
+#     return text
+# 星火 API 调用使用
+answer = ""
+class Ws_Param(object):
+    # 初始化
+    def __init__(self, APPID, APIKey, APISecret, Spark_url):
+        self.APPID = APPID
+        self.APIKey = APIKey
+        self.APISecret = APISecret
+        self.host = urlparse(Spark_url).netloc
+        self.path = urlparse(Spark_url).path
+        self.Spark_url = Spark_url
+        # 自定义
+        self.temperature = 0
+        self.max_tokens = 2048
+    # 生成url
+    def create_url(self):
+        # 生成RFC1123格式的时间戳
+        now = datetime.now()
+        date = format_date_time(mktime(now.timetuple()))
+        # 拼接字符串
+        signature_origin = "host: " + self.host + "\n"
+        signature_origin += "date: " + date + "\n"
+        signature_origin += "GET " + self.path + " HTTP/1.1"
+        # 进行hmac-sha256进行加密
+        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
+                                 digestmod=hashlib.sha256).digest()
+        signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8')
+        authorization_origin = f'api_key="{self.APIKey}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"'
+        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
+        # 将请求的鉴权参数组合为字典
+        v = {
+            "authorization": authorization,
+            "date": date,
+            "host": self.host
+        }
+        # 拼接鉴权参数，生成url
+        url = self.Spark_url + '?' + urlencode(v)
+        # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释，比对相同参数时生成的url与自己代码生成的url是否一致
+        return url
+# 收到websocket错误的处理
+def on_error(ws, error):
+    print("### error:", error)
+# 收到websocket关闭的处理
+def on_close(ws,one,two):
+    print(" ")
+# 收到websocket连接建立的处理
+def on_open(ws):
+    thread.start_new_thread(run, (ws,))
+def run(ws, *args):
+    data = json.dumps(gen_params(appid=ws.appid, domain= ws.domain,question=ws.question, temperature = ws.temperature, max_tokens = ws.max_tokens))
+    ws.send(data)
+# 收到websocket消息的处理
+def on_message(ws, message):
+    # print(message)
+    data = json.loads(message)
+    code = data['header']['code']
+    if code != 0:
+        print(f'请求错误: {code}, {data}')
+        ws.close()
+    else:
+        choices = data["payload"]["choices"]
+        status = choices["status"]
+        content = choices["text"][0]["content"]
+        print(content,end ="")
+        global answer
+        answer += content
+        # print(1)
+        if status == 2:
+            ws.close()
+def gen_params(appid, domain,question, temperature, max_tokens):
+    """
+    通过appid和用户的提问来生成请参数
+    """
+    data = {
+        "header": {
+            "app_id": appid,
+            "uid": "1234"
+        },
+        "parameter": {
+            "chat": {
+                "domain": domain,
+                "random_threshold": 0.5,
+                "max_tokens": max_tokens,
+                "temperature" : temperature,
+                "auditing": "default"
+            }
+        },
+        "payload": {
+            "message": {
+                "text": question
+            }
+        }
+    }
+    return data
+def spark_main(appid, api_key, api_secret, Spark_url,domain, question, temperature, max_tokens):
+    # print("星火:")
+    output_queue = queue.Queue()
+    def on_message(ws, message):
+        data = json.loads(message)
+        code = data['header']['code']
+        if code != 0:
+            print(f'请求错误: {code}, {data}')
+            ws.close()
+        else:
+            choices = data["payload"]["choices"]
+            status = choices["status"]
+            content = choices["text"][0]["content"]
+            # print(content, end='')
+            # 将输出值放入队列
+            output_queue.put(content)
+            if status == 2:
+                ws.close()
+    wsParam = Ws_Param(appid, api_key, api_secret, Spark_url)
+    websocket.enableTrace(False)
+    wsUrl = wsParam.create_url()
+    ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close, on_open=on_open)
+    ws.appid = appid
+    ws.question = question
+    ws.domain = domain
+    ws.temperature = temperature
+    ws.max_tokens = max_tokens
+    ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
+    return ''.join([output_queue.get() for _ in range(output_queue.qsize())])
+def parse_llm_api_key(model:str, env_file:dict()=None):
+    """
+    通过 model 和 env_file 的来解析平台参数
+    """
+    if env_file is None:
+        _ = load_dotenv(find_dotenv())
+        env_file = os.environ
+    if model == "openai":
+        return env_file["OPENAI_API_KEY"]
+    elif model == "wenxin":
+        return env_file["wenxin_api_key"], env_file["wenxin_secret_key"]
+    elif model == "spark":
+        return env_file["spark_api_key"], env_file["spark_appid"], env_file["spark_api_secret"]
+    elif model == "zhipuai":
+        return get_from_dict_or_env(env_file, "zhipuai_api_key", "ZHIPUAI_API_KEY")
+        # return env_file["ZHIPUAI_API_KEY"]
+    else:
+        raise ValueError(f"model{model} not support!!!")

llm/self_llm.py ADDED Viewed

	@@ -0,0 +1,47 @@

+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@File    :   self_llm.py
+@Time    :   2023/10/16 18:48:08
+@Author  :   Logan Zou
+@Version :   1.0
+@Contact :   loganzou0421@163.com
+@License :   (C)Copyright 2017-2018, Liugroup-NLPR-CASIA
+@Desc    :   在 LangChain LLM 基础上封装的项目类，统一了 GPT、文心、讯飞、智谱多种 API 调用
+'''
+from langchain.llms.base import LLM
+from typing import Dict, Any, Mapping
+from pydantic import Field
+class Self_LLM(LLM):
+    # 自定义 LLM
+    # 继承自 langchain.llms.base.LLM
+    # 原生接口地址
+    url : str =  None
+    # 默认选用 GPT-3.5 模型，即目前一般所说的GPT
+    model_name: str = "gpt-3.5-turbo"
+    # 访问时延上限
+    request_timeout: float = None
+    # 温度系数
+    temperature: float = 0.1
+    # API_Key
+    api_key: str = None
+    # 必备的可选参数
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    # 定义一个返回默认参数的方法
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """获取调用默认参数。"""
+        normal_params = {
+            "temperature": self.temperature,
+            "request_timeout": self.request_timeout,
+            }
+        # print(type(self.model_kwargs))
+        return {**normal_params}
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        return {**{"model_name": self.model_name}, **self._default_params}

llm/spark_llm.py ADDED Viewed

	@@ -0,0 +1,227 @@

+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@File    :   wenxin_llm.py
+@Time    :   2023/10/16 18:53:26
+@Author  :   Logan Zou
+@Version :   1.0
+@Contact :   loganzou0421@163.com
+@License :   (C)Copyright 2017-2018, Liugroup-NLPR-CASIA
+@Desc    :   基于讯飞星火大模型自定义 LLM 类
+'''
+from langchain.llms.base import LLM
+from typing import Any, List, Mapping, Optional, Dict, Union, Tuple
+from pydantic import Field
+from llm.self_llm import Self_LLM
+import json
+import requests
+from langchain.callbacks.manager import CallbackManagerForLLMRun
+import _thread as thread
+import base64
+import datetime
+import hashlib
+import hmac
+import json
+from urllib.parse import urlparse
+import ssl
+from datetime import datetime
+from time import mktime
+from urllib.parse import urlencode
+from wsgiref.handlers import format_date_time
+import websocket  # 使用websocket_client
+import queue
+class Spark_LLM(Self_LLM):
+    # 讯飞星火大模型的自定义 LLM
+    # URL
+    url : str = "ws://spark-api.xf-yun.com/v1.1/chat"
+    # APPID
+    appid : str = None
+    # APISecret
+    api_secret : str = None
+    # Domain
+    domain :str = "general"
+    # max_token
+    max_tokens : int = 4096
+    def getText(self, role, content, text = []):
+        # role 是指定角色，content 是 prompt 内容
+        jsoncon = {}
+        jsoncon["role"] = role
+        jsoncon["content"] = content
+        text.append(jsoncon)
+        return text
+    def _call(self, prompt : str, stop: Optional[List[str]] = None,
+                run_manager: Optional[CallbackManagerForLLMRun] = None,
+                **kwargs: Any):
+        if self.api_key == None or self.appid == None or self.api_secret == None:
+            # 三个 Key 均存在才可以正常调用
+            print("请填入 Key")
+            raise ValueError("Key 不存在")
+        # 将 Prompt 填充到星火格式
+        question = self.getText("user", prompt)
+        # 发起请求
+        try:
+            response = spark_main(self.appid,self.api_key,self.api_secret,self.url,self.domain,question, self.temperature, self.max_tokens)
+            return response
+        except Exception as e:
+            print(e)
+            print("请求失败")
+            return "请求失败"
+    @property
+    def _llm_type(self) -> str:
+        return "Spark"
+answer = ""
+class Ws_Param(object):
+    # 初始化
+    def __init__(self, APPID, APIKey, APISecret, Spark_url):
+        self.APPID = APPID
+        self.APIKey = APIKey
+        self.APISecret = APISecret
+        self.host = urlparse(Spark_url).netloc
+        self.path = urlparse(Spark_url).path
+        self.Spark_url = Spark_url
+        # 自定义
+        self.temperature = 0
+        self.max_tokens = 2048
+    # 生成url
+    def create_url(self):
+        # 生成RFC1123格式的时间戳
+        now = datetime.now()
+        date = format_date_time(mktime(now.timetuple()))
+        # 拼接字符串
+        signature_origin = "host: " + self.host + "\n"
+        signature_origin += "date: " + date + "\n"
+        signature_origin += "GET " + self.path + " HTTP/1.1"
+        # 进行hmac-sha256进行加密
+        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
+                                 digestmod=hashlib.sha256).digest()
+        signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8')
+        authorization_origin = f'api_key="{self.APIKey}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"'
+        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
+        # 将请求的鉴权参数组合为字典
+        v = {
+            "authorization": authorization,
+            "date": date,
+            "host": self.host
+        }
+        # 拼接鉴权参数，生成url
+        url = self.Spark_url + '?' + urlencode(v)
+        # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释，比对相同参数时生成的url与自己代码生成的url是否一致
+        return url
+# 收到websocket错误的处理
+def on_error(ws, error):
+    print("### error:", error)
+# 收到websocket关闭的处理
+def on_close(ws,one,two):
+    print(" ")
+# 收到websocket连接建立的处理
+def on_open(ws):
+    thread.start_new_thread(run, (ws,))
+def run(ws, *args):
+    data = json.dumps(gen_params(appid=ws.appid, domain= ws.domain,question=ws.question, temperature = ws.temperature, max_tokens = ws.max_tokens))
+    ws.send(data)
+# 收到websocket消息的处理
+def on_message(ws, message):
+    # print(message)
+    data = json.loads(message)
+    code = data['header']['code']
+    if code != 0:
+        print(f'请求错误: {code}, {data}')
+        ws.close()
+    else:
+        choices = data["payload"]["choices"]
+        status = choices["status"]
+        content = choices["text"][0]["content"]
+        print(content,end ="")
+        global answer
+        answer += content
+        # print(1)
+        if status == 2:
+            ws.close()
+def gen_params(appid, domain,question, temperature, max_tokens):
+    """
+    通过appid和用户的提问来生成请参数
+    """
+    data = {
+        "header": {
+            "app_id": appid,
+            "uid": "1234"
+        },
+        "parameter": {
+            "chat": {
+                "domain": domain,
+                "random_threshold": 0.5,
+                "max_tokens": max_tokens,
+                "temperature" : temperature,
+                "auditing": "default"
+            }
+        },
+        "payload": {
+            "message": {
+                "text": question
+            }
+        }
+    }
+    return data
+def spark_main(appid, api_key, api_secret, Spark_url,domain, question, temperature, max_tokens):
+    # print("星火:")
+    output_queue = queue.Queue()
+    def on_message(ws, message):
+        data = json.loads(message)
+        code = data['header']['code']
+        if code != 0:
+            print(f'请求错误: {code}, {data}')
+            ws.close()
+        else:
+            choices = data["payload"]["choices"]
+            status = choices["status"]
+            content = choices["text"][0]["content"]
+            # print(content, end='')
+            # 将输出值放入队列
+            output_queue.put(content)
+            if status == 2:
+                ws.close()
+    wsParam = Ws_Param(appid, api_key, api_secret, Spark_url)
+    websocket.enableTrace(False)
+    wsUrl = wsParam.create_url()
+    ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close, on_open=on_open)
+    ws.appid = appid
+    ws.question = question
+    ws.domain = domain
+    ws.temperature = temperature
+    ws.max_tokens = max_tokens
+    ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
+    return ''.join([output_queue.get() for _ in range(output_queue.qsize())])

llm/test.ipynb ADDED Viewed

	@@ -0,0 +1,312 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from wenxin_llm import Wenxin_LLM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dotenv import find_dotenv, load_dotenv\n",
+    "import os\n",
+    "\n",
+    "# 读取本地/项目的环境变量。\n",
+    "\n",
+    "# find_dotenv()寻找并定位.env文件的路径\n",
+    "# load_dotenv()读取该.env文件，并将其中的环境变量加载到当前的运行环境中\n",
+    "# 如果你设置的是全局的环境变量，这行代码则没有任何作用。\n",
+    "_ = load_dotenv(find_dotenv())\n",
+    "\n",
+    "# 获取环境变量 OPENAI_API_KEY\n",
+    "wenxin_api_key = os.environ[\"wenxin_api_key\"]\n",
+    "wenxin_secret_key = os.environ[\"wenxin_secret_key\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = Wenxin_LLM(model = \"ERNIE-Bot-turbo\", api_key=wenxin_api_key, secret_key=wenxin_secret_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'您好，我是百度研发的知识增强大语言模型，中文名是文心一言，英文名是ERNIE Bot。我能够与人对话互动，回答问题，协助创作，高效便捷地帮助人们获取信息、知识和灵感。\\n\\n如果您有任何问题，请随时告诉我。'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm(\"你是谁\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from spark_llm import Spark_LLM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dotenv import find_dotenv, load_dotenv\n",
+    "import os\n",
+    "\n",
+    "# 读取本地/项目的环境变量。\n",
+    "\n",
+    "# find_dotenv()寻找并定位.env文件的路径\n",
+    "# load_dotenv()读取该.env文件，并将其中的环境变量加载到当前的运行环境中\n",
+    "# 如果你设置的是全局的环境变量，这行代码则没有任何作用。\n",
+    "_ = load_dotenv(find_dotenv())\n",
+    "#填写控制台中获取的 APPID 信息\n",
+    "appid = os.environ[\"spark_appid\"]\n",
+    "#填写控制台中获取的 APISecret 信息\n",
+    "api_secret = os.environ[\"spark_api_secret\"]\n",
+    "#填写控制台中获取的 APIKey 信息\n",
+    "api_key = os.environ[\"spark_api_key\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = Spark_LLM(model = \"spark\", appid=appid, api_secret=api_secret, api_key=api_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\n\\n我是一个AI语言模型，可以回答你的问题和提供帮助。'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm(\"你是谁\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from zhipuai_llm import ZhipuAILLM\n",
+    "\n",
+    "from dotenv import find_dotenv, load_dotenv\n",
+    "import os\n",
+    "\n",
+    "# 读取本地/项目的环境变量。\n",
+    "\n",
+    "# find_dotenv()寻找并定位.env文件的路径\n",
+    "# load_dotenv()读取该.env文件，并将其中的环境变量加载到当前的运行环境中\n",
+    "# 如果你设置的是全局的环境变量，这行代码则没有任何作用。\n",
+    "_ = load_dotenv(find_dotenv())\n",
+    "\n",
+    "api_key = os.environ[\"ZHIPUAI_API_KEY\"]    #填写控制台中获取的 APIKey 信息"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'我是一个名为 ChatGLM 的人工智能助手，由智谱 AI 公司于2023年训练的语言模型开发而成。我的任务是针对用户的问题和要求提供适当的答复和支持。'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm = ZhipuAILLM(model=\"chatglm_pro\", zhipuai_api_key=api_key, temperature=0.1)\n",
+    "llm(\"你是谁\") "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "测试原生接口"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from call_llm import get_completion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dotenv import find_dotenv, load_dotenv\n",
+    "import os\n",
+    "\n",
+    "# 读取本地/项目的环境变量。\n",
+    "\n",
+    "# find_dotenv()寻找并定位.env文件的路径\n",
+    "# load_dotenv()读取该.env文件，并将其中的环境变量加载到当前的运行环境中\n",
+    "# 如果你设置的是全局的环境变量，这行代码则没有任何作用。\n",
+    "_ = load_dotenv(find_dotenv())\n",
+    "\n",
+    "# 获取环境变量 OPENAI_API_KEY\n",
+    "openai_api_key = os.environ[\"OPENAI_API_KEY\"]\n",
+    "wenxin_api_key = os.environ[\"wenxin_api_key\"]\n",
+    "wenxin_secret_key = os.environ[\"wenxin_secret_key\"]\n",
+    "spark_appid = os.environ[\"spark_appid\"]\n",
+    "spark_api_secret = os.environ[\"spark_api_secret\"]\n",
+    "spark_api_key = os.environ[\"spark_api_key\"]\n",
+    "zhipu_api_key = os.environ[\"ZHIPUAI_API_KEY\"]\n",
+    "\n",
+    "# os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890'\n",
+    "# os.environ[\"HTTP_PROXY\"] = 'http://127.0.0.1:7890'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'我是一个人工智能助手，可以回答你的问题并提供帮助。有什么可以帮到你的吗？'"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_completion(\"你是谁\",model=\"gpt-3.5-turbo\", api_key=openai_api_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'您好，我是百度研发的知识增强大语言模型，中文名是文心一言，英文名是ERNIE Bot。我能够与人对话互动，回答问题，协助创作，高效便捷地帮助人们获取信息、知识和灵感。'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_completion(\"你是谁\",model=\"ERNIE-Bot-turbo\", api_key=wenxin_api_key, secret_key=wenxin_secret_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\n\\n我是一个AI语言模型，可以回答你的问题和提供帮助。'"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_completion(\"你是谁\",model=\"Spark-1.5\", appid=spark_appid, api_key=spark_api_key, api_secret=spark_api_secret)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'我是一个名为 ChatGLM 的人工智能助手，由智谱 AI 公司于2023年训练的语言模型开发而成。我的任务是针对用户的问题和要求提供适当的答复和支持。'"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_completion(\"你是谁\",model=\"chatglm_std\", api_key=zhipu_api_key)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "langchain",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

llm/wenxin_llm.py ADDED Viewed

	@@ -0,0 +1,90 @@

+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@File    :   wenxin_llm.py
+@Time    :   2023/10/16 18:53:26
+@Author  :   Logan Zou
+@Version :   1.0
+@Contact :   loganzou0421@163.com
+@License :   (C)Copyright 2017-2018, Liugroup-NLPR-CASIA
+@Desc    :   基于百度文心大模型自定义 LLM 类
+'''
+from langchain.llms.base import LLM
+from typing import Any, List, Mapping, Optional, Dict, Union, Tuple
+from pydantic import Field
+from llm.self_llm import Self_LLM
+import json
+import requests
+from langchain.callbacks.manager import CallbackManagerForLLMRun
+# 调用文心 API 的工具函数
+def get_access_token(api_key : str, secret_key : str):
+    """
+    使用 API Key，Secret Key 获取access_token，替换下列示例中的应用API Key、应用Secret Key
+    """
+    # 指定网址
+    url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={api_key}&client_secret={secret_key}"
+    # 设置 POST 访问
+    payload = json.dumps("")
+    headers = {
+        'Content-Type': 'application/json',
+        'Accept': 'application/json'
+    }
+    # 通过 POST 访问获取账户对应的 access_token
+    response = requests.request("POST", url, headers=headers, data=payload)
+    return response.json().get("access_token")
+class Wenxin_LLM(Self_LLM):
+    # 文心大模型的自定义 LLM
+    # URL
+    url : str = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant?access_token={}"
+    # Secret_Key
+    secret_key : str = None
+    # access_token
+    access_token: str = None
+    def init_access_token(self):
+        if self.api_key != None and self.secret_key != None:
+            # 两个 Key 均非空才可以获取 access_token
+            try:
+                self.access_token = get_access_token(self.api_key, self.secret_key)
+            except Exception as e:
+                print(e)
+                print("获取 access_token 失败，请检查 Key")
+        else:
+            print("API_Key 或 Secret_Key 为空，请检查 Key")
+    def _call(self, prompt : str, stop: Optional[List[str]] = None,
+                run_manager: Optional[CallbackManagerForLLMRun] = None,
+                **kwargs: Any):
+        # 如果 access_token 为空，初始化 access_token
+        if self.access_token == None:
+            self.init_access_token()
+        # API 调用 url
+        url = self.url.format(self.access_token)
+        # 配置 POST 参数
+        payload = json.dumps({
+            "messages": [
+                {
+                    "role": "user",# user prompt
+                    "content": "{}".format(prompt)# 输入的 prompt
+                }
+            ],
+            'temperature' : self.temperature
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        # 发起请求
+        response = requests.request("POST", url, headers=headers, data=payload, timeout=self.request_timeout)
+        if response.status_code == 200:
+            # 返回的是一个 Json 字符串
+            js = json.loads(response.text)
+            # print(js)
+            return js["result"]
+        else:
+            return "请求失败"
+    @property
+    def _llm_type(self) -> str:
+        return "Wenxin"

llm/zhipuai_llm.py ADDED Viewed

	@@ -0,0 +1,217 @@

+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@File    :   zhipuai_llm.py
+@Time    :   2023/10/16 22:06:26
+@Author  :   0-yy-0
+@Version :   1.0
+@Contact :   310484121@qq.com
+@License :   (C)Copyright 2017-2018, Liugroup-NLPR-CASIA
+@Desc    :   基于智谱 AI 大模型自定义 LLM 类
+'''
+from __future__ import annotations
+import logging
+from typing import (
+    Any,
+    AsyncIterator,
+    Dict,
+    Iterator,
+    List,
+    Optional,
+)
+from langchain.callbacks.manager import (
+    AsyncCallbackManagerForLLMRun,
+    CallbackManagerForLLMRun,
+)
+from langchain.llms.base import LLM
+from langchain.pydantic_v1 import Field, root_validator
+from langchain.schema.output import GenerationChunk
+from langchain.utils import get_from_dict_or_env
+from llm.self_llm import Self_LLM
+logger = logging.getLogger(__name__)
+class ZhipuAILLM(Self_LLM):
+    """Zhipuai hosted open source or customized models.
+    To use, you should have the ``zhipuai`` python package installed, and
+    the environment variable ``zhipuai_api_key`` set with
+    your API key and Secret Key.
+    zhipuai_api_key are required parameters which you could get from
+    https://open.bigmodel.cn/usercenter/apikeys
+    Example:
+        .. code-block:: python
+            from langchain.llms import ZhipuAILLM
+            zhipuai_model = ZhipuAILLM(model="chatglm_std", temperature=temperature)
+    """
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    client: Any
+    model: str = "chatglm_std"
+    """Model name in chatglm_pro, chatglm_std, chatglm_lite. """
+    zhipuai_api_key: Optional[str] = None
+    incremental: Optional[bool] = True
+    """Whether to incremental the results or not."""
+    streaming: Optional[bool] = False
+    """Whether to streaming the results or not."""
+    # streaming = -incremental
+    request_timeout: Optional[int] = 60
+    """request timeout for chat http requests"""
+    top_p: Optional[float] = 0.8
+    temperature: Optional[float] = 0.95
+    request_id: Optional[float] = None
+    @root_validator()
+    def validate_enviroment(cls, values: Dict) -> Dict:
+        values["zhipuai_api_key"] = get_from_dict_or_env(
+            values,
+            "zhipuai_api_key",
+            "ZHIPUAI_API_KEY",
+        )
+        params = {
+            "zhipuai_api_key": values["zhipuai_api_key"],
+            "model": values["model"],
+        }
+        try:
+            import zhipuai
+            zhipuai.api_key = values["zhipuai_api_key"]
+            values["client"] = zhipuai.model_api
+        except ImportError:
+            raise ValueError(
+                "zhipuai package not found, please install it with "
+                "`pip install zhipuai`"
+            )
+        return values
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        return {
+            **{"model": self.model},
+            **super()._identifying_params,
+        }
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "zhipuai"
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling OpenAI API."""
+        normal_params = {
+            "streaming": self.streaming,
+            "top_p": self.top_p,
+            "temperature": self.temperature,
+            "request_id": self.request_id,
+        }
+        return {**normal_params, **self.model_kwargs}
+    def _convert_prompt_msg_params(
+        self,
+        prompt: str,
+        **kwargs: Any,
+    ) -> dict:
+        return {
+            **{"prompt": prompt, "model": self.model},
+            **self._default_params,
+            **kwargs,
+        }
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        """Call out to an zhipuai models endpoint for each generation with a prompt.
+        Args:
+            prompt: The prompt to pass into the model.
+        Returns:
+            The string generated by the model.
+        Example:
+            .. code-block:: python
+                response = zhipuai_model("Tell me a joke.")
+        """
+        if self.streaming:
+            completion = ""
+            for chunk in self._stream(prompt, stop, run_manager, **kwargs):
+                completion += chunk.text
+            return completion
+        params = self._convert_prompt_msg_params(prompt, **kwargs)
+        response_payload = self.client.invoke(**params)
+        return response_payload["data"]["choices"][-1]["content"].strip('"').strip(" ")
+    async def _acall(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        if self.streaming:
+            completion = ""
+            async for chunk in self._astream(prompt, stop, run_manager, **kwargs):
+                completion += chunk.text
+            return completion
+        params = self._convert_prompt_msg_params(prompt, **kwargs)
+        response = await self.client.async_invoke(**params)
+        return response_payload
+    def _stream(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[GenerationChunk]:
+        params = self._convert_prompt_msg_params(prompt, **kwargs)
+        for res in self.client.invoke(**params):
+            if res:
+                chunk = GenerationChunk(text=res)
+                yield chunk
+                if run_manager:
+                    run_manager.on_llm_new_token(chunk.text)
+    async def _astream(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[GenerationChunk]:
+        params = self._convert_prompt_msg_params(prompt, **kwargs)
+        async for res in await self.client.ado(**params):
+            if res:
+                chunk = GenerationChunk(text=res["data"]["choices"]["content"])
+                yield chunk
+                if run_manager:
+                    await run_manager.on_llm_new_token(chunk.text)

prompts.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""define some usefull prompts"""
+trans_prompt = """
+    Please translate the word into chinese: {} """
+query_prompt = """
+    Please translate the word into chinese \
+        and give english example sentence and sentence meaning in Chinese:
+    {} """
+learn_prompt = """
+    Please give an english sentence contains these words: {} \
+        and give sentence meaning in Chinese.
+    """
+wash_prompt = """
+    Please help me to remove the duplicate words and meaningless words from
+    the words list: {}  and return the result.
+    For example:
+    User: apple, red, green, yellow, red, blll><, :*&
+    Your return: apple, red, green, yellow
+"""
+#*******************************************************
+#************** user prompts ***************************
+#*******************************************************
+user_message_mapper = """
+        Please classify the following categories and return the name of the category. If your think user's requirement does not belong to the following categories, return the number 0.
+        Here are four examples:
+        User: I want to know the meaning of the word "apple"
+    """
+#*******************************************************
+#************** system prompts *************************
+#*******************************************************
+# Using few shots learning to map the user's requirement to a command
+system_message_mapper = """
+        You will receive the user's requirement. \
+        Please classify the user's requirement into the following categories \
+        and return the name of the category.
+        If your think user's requirement does not belong to the \
+        following categories, return the number 0.
+        Here are four examples:
+        User: I want to know the meaning of the word "apple"
+        Your return: :query apple
+        User: I want to add the word "apple" to my dictionary
+        Your return: :add apple
+        User: I want to remove the word "apple" from my dictionary
+        Your return: :remove apple
+        User: I want to learn the word "apple" based words in my dictionary now
+        Your return: :learn
+        User: I already learnt the meaning of the word "apple"
+        Your return: :remove apple
+        User: I want to know the rest words in my dictionary
+        Your return: :show
+        Here are the meaning of four categories for you to reference:
+        :query word means user want to study the meaning of word
+        :add word means user want to add word into his dictionary
+        :remove word means user want to remove word from his dictionary
+        :learn word means user want to learn words in his dictionary related to the word
+        :show means user want to show all words in his dictionary
+        """
+# using few shots learning to select the important words in a sentence
+system_message_select = """
+    You will receive the english sentence from user, please select the words in the sentence that you think
+    is important for the user to understand the meaning of the sentence.
+    If the sentence does not contain any english words, please just return number 0.
+    For example:
+    User: The apple is red, and it's green.
+    Your return: apple, red
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,157 @@

+aiofiles==23.2.1
+aiohttp==3.8.6
+aiosignal==1.3.1
+altair==5.1.2
+annotated-types==0.6.0
+anyio==4.0.0
+asttokens==2.2.1
+async-timeout==4.0.3
+attrs==23.1.0
+backcall==0.2.0
+backoff==2.2.1
+cachetools==5.3.2
+certifi==2023.7.22
+chardet==5.2.0
+charset-normalizer==3.3.2
+chromadb==0.3.29
+click==8.1.7
+clickhouse-connect==0.6.20
+coloredlogs==15.0.1
+comm==0.1.3
+contourpy==1.2.0
+cycler==0.12.1
+dataclasses==0.6
+dataclasses-json==0.5.14
+debugpy==1.6.6
+decorator==5.1.1
+duckdb==0.9.1
+entrypoints==0.4
+exceptiongroup==1.0.4
+executing==1.2.0
+fastapi==0.85.1
+ffmpy==0.3.1
+filelock==3.13.1
+filetype==1.2.0
+flatbuffers==23.5.26
+fonttools==4.44.0
+frozenlist==1.4.0
+fsspec==2023.10.0
+gradio==3.40.1
+gradio_client==0.7.0
+greenlet==3.0.1
+h11==0.14.0
+hnswlib==0.7.0
+httpcore==1.0.2
+httptools==0.6.1
+httpx==0.25.1
+huggingface-hub==0.17.3
+humanfriendly==10.0
+idna==3.4
+importlib-metadata==6.0.0
+importlib-resources==6.1.1
+ipykernel==6.23.1
+ipython==8.10.0
+jedi==0.18.2
+jieba==0.42.1
+Jinja2==3.1.2
+joblib==1.3.2
+jsonschema==4.19.2
+jsonschema-specifications==2023.7.1
+jupyter_client==8.6.0
+jupyter_core==5.3.0
+kiwisolver==1.4.5
+langchain==0.0.292
+langsmith==0.0.63
+linkify-it-py==2.0.2
+lxml==4.9.3
+lz4==4.3.2
+Markdown==3.4.3
+markdown-it-py==2.2.0
+MarkupSafe==2.1.3
+marshmallow==3.20.1
+matplotlib==3.8.1
+matplotlib-inline==0.1.3
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.0.4
+mypy-extensions==1.0.0
+nest-asyncio==1.5.4
+nltk==3.8.1
+numexpr==2.8.7
+numpy==1.26.2
+onnxruntime==1.16.2
+openai==0.27.6
+orjson==3.9.10
+overrides==7.4.0
+packaging==23.1
+pandas==2.1.3
+parso==0.8.3
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==10.1.0
+pip==23.3
+platformdirs==3.0.0
+posthog==3.0.2
+prompt-toolkit==3.0.38
+protobuf==4.25.0
+psutil==5.9.4
+ptyprocess==0.7.0
+pulsar-client==3.3.0
+pure-eval==0.2.2
+pydantic==1.10.10
+pydantic_core==2.10.1
+pydub==0.25.1
+Pygments==2.14.0
+PyJWT==2.8.0
+PyMuPDF==1.23.6
+PyMuPDFb==1.23.6
+pyparsing==3.1.1
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-magic==0.4.27
+python-multipart==0.0.6
+pytz==2023.3.post1
+PyYAML==6.0.1
+pyzmq==25.1.0
+referencing==0.30.2
+regex==2023.5.5
+requests==2.31.0
+rouge-chinese==1.0.3
+rpds-py==0.12.0
+scikit-learn==1.2.2
+scipy==1.11.2
+semantic-version==2.10.0
+setuptools==68.0.0
+six==1.16.0
+sniffio==1.3.0
+SQLAlchemy==2.0.23
+stack-data==0.6.2
+starlette==0.20.4
+sympy==1.12
+tabulate==0.9.0
+tenacity==8.2.3
+threadpoolctl==3.1.0
+tokenizers==0.14.1
+toolz==0.12.0
+tornado==6.3.3
+tqdm==4.66.1
+traitlets==5.9.0
+typing_extensions==4.7.1
+typing-inspect==0.9.0
+tzdata==2023.3
+uc-micro-py==1.0.2
+unstructured==0.9.0
+urllib3==2.0.7
+uvicorn==0.24.0.post1
+uvloop==0.19.0
+watchfiles==0.21.0
+wcwidth==0.2.5
+websocket-client==1.5.2
+websockets==11.0.3
+wheel==0.41.2
+yarl==1.9.2
+zhipuai==1.0.7
+zipp==3.11.0
+zstandard==0.22.0

test/__pycache__/test_create_db.cpython-311-pytest-8.2.0.pyc ADDED Viewed

Binary file (7.87 kB). View file

test/test_create_db.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from typing import List
+from pytest import fixture
+from create_db import split_text
+@fixture
+def sample_text():
+    return [
+        "Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
+        "Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "
+        "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. "
+        "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. "
+        "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
+        "Another long text string to demonstrate the splitting functionality. This text should also be split into multiple chunks."
+    ]
+def test_split_text(sample_text):
+    # Split the sample text into chunks
+    chunks = split_text(sample_text)
+    # Assert that the chunks are lists of strings
+    assert all(
+        isinstance(chunk, list) and all(
+            isinstance(text, str) for text in chunk) for chunk in chunks)
+    # Assert that the chunks are not empty
+    assert all(chunk for chunk in chunks)
+    # Assert that the chunks have the expected length (approx. 1500 characters with 150 overlap)
+    expected_length = 1500 - 150  # Subtracting the overlap size
+    assert all(expected_length <= len(''.join(chunk)) < 1500
+               for chunk in chunks)
+    # Assert that the chunks contain the original text
+    original_text = ' '.join(sample_text)
+    assert all(text in original_text for chunk in chunks for text in chunk)
+    # Assert that the chunks do not overlap (except for the overlap size)
+    for i in range(len(chunks) - 1):
+        previous_chunk = chunks[i]
+        next_chunk = chunks[i + 1]
+        overlap = ''.join(set(previous_chunk[-150:]) & set(next_chunk[:150]))
+        assert len(overlap) == 150 or not overlap

words_db.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import sqlite3
+from loguru import logger
+DB_PATH = './database/word_database.db'
+class WordsDB(object):
+    def __init__(self):
+        logger.info('Initialized words database.')
+    def _connect_db(self):
+        conn = sqlite3.connect(
+            DB_PATH,
+            timeout=10,
+            check_same_thread=False)
+        cursor = conn.cursor()
+        cursor.execute('''
+        CREATE TABLE IF NOT EXISTS words (
+            id INTEGER PRIMARY KEY,
+            word TEXT NOT NULL,
+            definition TEXT NOT NULL
+        )
+        ''')
+        return conn, conn.cursor()
+    def add_word(self, word, definition):
+        self.conn, self.cursor = self._connect_db()
+        self.cursor.execute('INSERT INTO words (word, definition) VALUES (?, ?)', (word, definition))
+        self.conn.commit()
+        self.cursor.close()
+        self.conn.close()
+    def delete_word(self, word):
+        self.conn, self.cursor = self._connect_db()
+        self.cursor.execute('DELETE FROM words WHERE word = ?', (word,))
+        self.conn.commit()
+        self.cursor.close()
+        self.conn.close()
+    def update_word(self, word, new_definition):
+        self.conn, self.cursor = self._connect_db()
+        self.cursor.execute('UPDATE words SET definition = ? WHERE word = ?', (new_definition, word))
+        self.conn.commit()
+        self.cursor.close()
+        self.conn.close()
+    def query_word(self):
+        self.conn, self.cursor = self._connect_db()
+        self.cursor.execute('SELECT * FROM words')
+        res = self.cursor.fetchall()
+        self.cursor.close()
+        self.conn.close()
+        return res
+words_db = WordsDB()
+if __name__ == '__main__':
+    words_db.add_word('apple', '苹果')
+    words_db.add_word('banana', '香蕉')
+    words_db.update_word('banana', '新的香蕉')
+    result = words_db.query_word('banana')
+    print(result)