Spaces:

NCTCMumbai
/

Customs_Manual_Chatbot

Running

App Files Files Community

NCTCMumbai commited on Apr 4

Commit

e34a93e

•

1 Parent(s): 8fe98c8

Upload 7 files

Browse files

Files changed (7) hide show

app.py +160 -0
backend/__pycache__/query_llm.cpython-310.pyc +0 -0
backend/__pycache__/semantic_search.cpython-310.pyc +0 -0
backend/query_llm.py +156 -0
backend/semantic_search.py +19 -0
logo.png +0 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""
+Credit to Derek Thomas, derek@huggingface.co
+"""
+import subprocess
+# subprocess.run(["pip", "install", "--upgrade", "transformers[torch,sentencepiece]==4.34.1"])
+import logging
+from pathlib import Path
+from time import perf_counter
+import gradio as gr
+from jinja2 import Environment, FileSystemLoader
+import numpy as np
+from sentence_transformers import CrossEncoder
+from backend.query_llm import generate_hf, generate_openai
+from backend.semantic_search import table, retriever
+VECTOR_COLUMN_NAME = "embeddings"
+TEXT_COLUMN_NAME = "text"
+proj_dir = Path(__file__).parent
+# Setting up the logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Set up the template environment with the templates directory
+env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
+# Load the templates directly from the environment
+template = env.get_template('template.j2')
+template_html = env.get_template('template_html.j2')
+# crossEncoder
+cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
+# Examples
+examples = ['What is Let Export Order?',
+            'What are benefits of  the AEO Scheme ?',
+            'Which circular talks about EOU?', ]
+def add_text(history, text):
+    history = [] if history is None else history
+    history = history + [(text, None)]
+    return history, gr.Textbox(value="", interactive=False)
+def bot(history, api_kind):
+    top_rerank = 15
+    top_k_rank = 8
+    query = history[-1][0]
+    if not query:
+         gr.Warning("Please submit a non-empty string as a prompt")
+         raise ValueError("Empty string was submitted")
+    logger.warning('Retrieving documents...')
+    # Retrieve documents relevant to query
+    document_start = perf_counter()
+    query_vec = retriever.encode(query)
+    logger.warning(f'Finished query vec')
+    doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
+    logger.warning(f'Finished search')
+    documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_rerank).to_list()
+    documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
+    logger.warning(f'start cross encoder {len(documents)}')
+    # Retrieve documents relevant to query
+    query_doc_pair = [[query, doc] for doc in documents]
+    cross_scores = cross_encoder.predict(query_doc_pair)
+    sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
+    logger.warning(f'Finished cross encoder {len(documents)}')
+    documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
+    logger.warning(f'num documents {len(documents)}')
+    document_time = perf_counter() - document_start
+    logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')
+    # Create Prompt
+    prompt = template.render(documents=documents, query=query)
+    prompt_html = template_html.render(documents=documents, query=query)
+    if api_kind == "HuggingFace":
+         generate_fn = generate_hf
+    elif api_kind == "OpenAI":
+         generate_fn = generate_openai
+    elif api_kind is None:
+         gr.Warning("API name was not provided")
+         raise ValueError("API name was not provided")
+    else:
+         gr.Warning(f"API {api_kind} is not supported")
+         raise ValueError(f"API {api_kind} is not supported")
+    history[-1][1] = ""
+    for character in generate_fn(prompt, history[:-1]):
+        history[-1][1] = character
+        yield history, prompt_html
+with gr.Blocks() as demo:
+    # Beautiful heading with logo
+    gr.HTML(value="""
+    <div style="display: flex; align-items: center; justify-content: space-between;">
+      <h1 style="color: #2ECC71">Customs Manual Chatbot</h1>
+      <img src='logo.png' alt="Chatbot" width="50" height="50" />
+    </div>
+    """, elem_id="heading")
+    # Formatted description
+    gr.HTML(value="""<p style="font-family: sans-serif; font-size: 16px;">A free chat bot developed by National Customs   		Targeting Center  using Open source LLMs.</p>""", elem_id="description")
+    chatbot = gr.Chatbot(
+      [],
+      elem_id="chatbot",
+      avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
+                      'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
+      bubble_full_width=False,
+      show_copy_button=True,
+      show_share_button=True,
+      )
+    with gr.Row():
+        txt = gr.Textbox(
+                scale=3,
+                show_label=False,
+                placeholder="Enter text and press enter",
+                container=False,
+                )
+        txt_btn = gr.Button(value="Submit text", scale=1)
+    api_kind = gr.Radio(choices=["HuggingFace", "OpenAI"], value="HuggingFace")
+    prompt_html = gr.HTML()
+    # Turn off interactivity while generating if you click
+    txt_msg = txt_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+            bot, [chatbot, api_kind], [chatbot, prompt_html])
+    # Turn it back on
+    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
+    # Turn off interactivity while generating if you hit enter
+    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+            bot, [chatbot, api_kind], [chatbot, prompt_html])
+    # Turn it back on
+    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
+    # Examples
+    gr.Examples(examples, txt)
+demo.queue()
+demo.launch(debug=True)

backend/__pycache__/query_llm.cpython-310.pyc ADDED Viewed

Binary file (4.36 kB). View file

backend/__pycache__/semantic_search.cpython-310.pyc ADDED Viewed

Binary file (700 Bytes). View file

backend/query_llm.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import openai
+import gradio as gr
+from os import getenv
+from typing import Any, Dict, Generator, List
+from huggingface_hub import InferenceClient
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+temperature = 0.9
+top_p = 0.6
+repetition_penalty = 1.2
+OPENAI_KEY = getenv("OPENAI_API_KEY")
+HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN")
+hf_client = InferenceClient(
+        "mistralai/Mistral-7B-Instruct-v0.1",
+        token=HF_TOKEN
+        )
+def format_prompt(message: str, api_kind: str):
+    """
+    Formats the given message using a chat template.
+    Args:
+        message (str): The user message to be formatted.
+    Returns:
+        str: Formatted message after applying the chat template.
+    """
+    # Create a list of message dictionaries with role and content
+    messages: List[Dict[str, Any]] = [{'role': 'user', 'content': message}]
+    if api_kind == "openai":
+        return messages
+    elif api_kind == "hf":
+        return tokenizer.apply_chat_template(messages, tokenize=False)
+    elif api_kind:
+        raise ValueError("API is not supported")
+def generate_hf(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
+             top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
+    """
+    Generate a sequence of tokens based on a given prompt and history using Mistral client.
+    Args:
+        prompt (str): The initial prompt for the text generation.
+        history (str): Context or history for the text generation.
+        temperature (float, optional): The softmax temperature for sampling. Defaults to 0.9.
+        max_new_tokens (int, optional): Maximum number of tokens to be generated. Defaults to 256.
+        top_p (float, optional): Nucleus sampling probability. Defaults to 0.95.
+        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
+    Returns:
+        Generator[str, None, str]: A generator yielding chunks of generated text.
+                                   Returns a final string if an error occurs.
+    """
+    temperature = max(float(temperature), 1e-2)  # Ensure temperature isn't too low
+    top_p = float(top_p)
+    generate_kwargs = {
+        'temperature': temperature,
+        'max_new_tokens': max_new_tokens,
+        'top_p': top_p,
+        'repetition_penalty': repetition_penalty,
+        'do_sample': True,
+        'seed': 42,
+        }
+    formatted_prompt = format_prompt(prompt, "hf")
+    try:
+        stream = hf_client.text_generation(formatted_prompt, **generate_kwargs,
+                                            stream=True, details=True, return_full_text=False)
+        output = ""
+        for response in stream:
+            output += response.token.text
+            yield output
+    except Exception as e:
+        if "Too Many Requests" in str(e):
+            print("ERROR: Too many requests on Mistral client")
+            gr.Warning("Unfortunately Mistral is unable to process")
+            return "Unfortunately, I am not able to process your request now."
+        elif "Authorization header is invalid" in str(e):
+            print("Authetification error:", str(e))
+            gr.Warning("Authentication error: HF token was either not provided or incorrect")
+            return "Authentication error"
+        else:
+            print("Unhandled Exception:", str(e))
+            gr.Warning("Unfortunately Mistral is unable to process")
+            return "I do not know what happened, but I couldn't understand you."
+def generate_openai(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
+             top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
+    """
+    Generate a sequence of tokens based on a given prompt and history using Mistral client.
+    Args:
+        prompt (str): The initial prompt for the text generation.
+        history (str): Context or history for the text generation.
+        temperature (float, optional): The softmax temperature for sampling. Defaults to 0.9.
+        max_new_tokens (int, optional): Maximum number of tokens to be generated. Defaults to 256.
+        top_p (float, optional): Nucleus sampling probability. Defaults to 0.95.
+        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
+    Returns:
+        Generator[str, None, str]: A generator yielding chunks of generated text.
+                                   Returns a final string if an error occurs.
+    """
+    temperature = max(float(temperature), 1e-2)  # Ensure temperature isn't too low
+    top_p = float(top_p)
+    generate_kwargs = {
+        'temperature': temperature,
+        'max_tokens': max_new_tokens,
+        'top_p': top_p,
+        'frequency_penalty': max(-2., min(repetition_penalty, 2.)),
+        }
+    formatted_prompt = format_prompt(prompt, "openai")
+    try:
+        stream = openai.ChatCompletion.create(model="gpt-3.5-turbo-0301",
+                                                messages=formatted_prompt,
+                                                **generate_kwargs,
+                                                stream=True)
+        output = ""
+        for chunk in stream:
+            output += chunk.choices[0].delta.get("content", "")
+            yield output
+    except Exception as e:
+        if "Too Many Requests" in str(e):
+            print("ERROR: Too many requests on OpenAI client")
+            gr.Warning("Unfortunately OpenAI is unable to process")
+            return "Unfortunately, I am not able to process your request now."
+        elif "You didn't provide an API key" in str(e):
+            print("Authetification error:", str(e))
+            gr.Warning("Authentication error: OpenAI key was either not provided or incorrect")
+            return "Authentication error"
+        else:
+            print("Unhandled Exception:", str(e))
+            gr.Warning("Unfortunately OpenAI is unable to process")
+            return "I do not know what happened, but I couldn't understand you."

backend/semantic_search.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import logging
+import lancedb
+import os
+from pathlib import Path
+from sentence_transformers import SentenceTransformer
+EMB_MODEL_NAME = "thenlper/gte-base"
+DB_TABLE_NAME = "Huggingface_docs"
+# Setting up the logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+retriever = SentenceTransformer(EMB_MODEL_NAME)
+# db
+db_uri = os.path.join(Path(__file__).parents[1], ".lancedb")
+db = lancedb.connect(db_uri)
+table = db.open_table(DB_TABLE_NAME)

logo.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+transformers[torch,sentencepiece]
+wikiextractor==3.0.6
+sentence-transformers>2.2.0
+ipywidgets==8.1.1
+tqdm==4.66.1
+aiohttp==3.8.6
+huggingface-hub==0.17.3
+lancedb
+openai==0.28