Spaces:

akazakov
/

rag-gradio-sample-project

Paused

App Files Files Community

AlexanderKazakov commited on Nov 23, 2023

Commit

360f505

1 Parent(s): eba1a12

make it work in zero draft

Browse files

Files changed (8) hide show

.idea/rag-gradio-sample-project.iml +1 -0
gradio_app/app.py +31 -34
gradio_app/backend/query_llm.py +10 -13
gradio_app/backend/semantic_search.py +5 -9
gradio_app/templates/{template.j2 → prompt_template.j2} +0 -0
prep_scripts/lancedb_setup.py +22 -13
prep_scripts/markdown_to_text.py +9 -8
settings.py +8 -0

.idea/rag-gradio-sample-project.iml CHANGED Viewed

@@ -2,6 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
     <orderEntry type="jdk" jdkName="Python 3.11 (rag-gradio-sample-project) (2)" jdkType="Python SDK" />

 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/data" />
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
     <orderEntry type="jdk" jdkName="Python 3.11 (rag-gradio-sample-project) (2)" jdkType="Python SDK" />

gradio_app/app.py CHANGED Viewed

@@ -6,28 +6,25 @@ Credit to Derek Thomas, derek@huggingface.co
 # subprocess.run(["pip", "install", "--upgrade", "transformers[torch,sentencepiece]==4.34.1"])
 import logging
-from pathlib import Path
 from time import perf_counter
 import gradio as gr
 from jinja2 import Environment, FileSystemLoader
 from backend.query_llm import generate_hf, generate_openai
-# from backend.semantic_search import table, retriever
-VECTOR_COLUMN_NAME = ""
-TEXT_COLUMN_NAME = ""
-proj_dir = Path(__file__).parent
 # Setting up the logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Set up the template environment with the templates directory
-env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
 # Load the templates directly from the environment
-template = env.get_template('template.j2')
 template_html = env.get_template('template_html.j2')
 # Examples
@@ -47,34 +44,34 @@ def bot(history, api_kind):
     query = history[-1][0]
     if not query:
-         gr.Warning("Please submit a non-empty string as a prompt")
-         raise ValueError("Empty string was submitted")
-    logger.warning('Retrieving documents...')
     # Retrieve documents relevant to query
     document_start = perf_counter()
-    query_vec = retriever.encode(query)
     documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
     documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
     document_time = perf_counter() - document_start
-    logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')
     # Create Prompt
-    prompt = template.render(documents=documents, query=query)
     prompt_html = template_html.render(documents=documents, query=query)
     if api_kind == "HuggingFace":
-         generate_fn = generate_hf
     elif api_kind == "OpenAI":
-         generate_fn = generate_openai
     elif api_kind is None:
-         gr.Warning("API name was not provided")
-         raise ValueError("API name was not provided")
     else:
-         gr.Warning(f"API {api_kind} is not supported")
-         raise ValueError(f"API {api_kind} is not supported")
     history[-1][1] = ""
     for character in generate_fn(prompt, history[:-1]):
@@ -84,22 +81,22 @@ def bot(history, api_kind):
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot(
-            [],
-            elem_id="chatbot",
-            avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
-                           'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
-            bubble_full_width=False,
-            show_copy_button=True,
-            show_share_button=True,
-            )
     with gr.Row():
         txt = gr.Textbox(
-                scale=3,
-                show_label=False,
-                placeholder="Enter text and press enter",
-                container=False,
-                )
         txt_btn = gr.Button(value="Submit text", scale=1)
     api_kind = gr.Radio(choices=["HuggingFace", "OpenAI"], value="HuggingFace")
@@ -107,14 +104,14 @@ with gr.Blocks() as demo:
     prompt_html = gr.HTML()
     # Turn off interactivity while generating if you click
     txt_msg = txt_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
-            bot, [chatbot, api_kind], [chatbot, prompt_html])
     # Turn it back on
     txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
     # Turn off interactivity while generating if you hit enter
     txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
-            bot, [chatbot, api_kind], [chatbot, prompt_html])
     # Turn it back on
     txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)

 # subprocess.run(["pip", "install", "--upgrade", "transformers[torch,sentencepiece]==4.34.1"])
 import logging
 from time import perf_counter
 import gradio as gr
 from jinja2 import Environment, FileSystemLoader
 from backend.query_llm import generate_hf, generate_openai
+from backend.semantic_search import table, embedder
+from settings import *
 # Setting up the logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Set up the template environment with the templates directory
+env = Environment(loader=FileSystemLoader('gradio_app/templates'))
 # Load the templates directly from the environment
+prompt_template = env.get_template('prompt_template.j2')
 template_html = env.get_template('template_html.j2')
 # Examples
     query = history[-1][0]
     if not query:
+        gr.Warning("Please submit a non-empty string as a prompt")
+        raise ValueError("Empty string was submitted")
+    logger.info('Retrieving documents...')
     # Retrieve documents relevant to query
     document_start = perf_counter()
+    query_vec = embedder.encode(query)
     documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
     documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
     document_time = perf_counter() - document_start
+    logger.info(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')
     # Create Prompt
+    prompt = prompt_template.render(documents=documents, query=query)
     prompt_html = template_html.render(documents=documents, query=query)
     if api_kind == "HuggingFace":
+        generate_fn = generate_hf
     elif api_kind == "OpenAI":
+        generate_fn = generate_openai
     elif api_kind is None:
+        gr.Warning("API name was not provided")
+        raise ValueError("API name was not provided")
     else:
+        gr.Warning(f"API {api_kind} is not supported")
+        raise ValueError(f"API {api_kind} is not supported")
     history[-1][1] = ""
     for character in generate_fn(prompt, history[:-1]):
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot(
+        [],
+        elem_id="chatbot",
+        avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
+                       'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
+        bubble_full_width=False,
+        show_copy_button=True,
+        show_share_button=True,
+    )
     with gr.Row():
         txt = gr.Textbox(
+            scale=3,
+            show_label=False,
+            placeholder="Enter text and press enter",
+            container=False,
+        )
         txt_btn = gr.Button(value="Submit text", scale=1)
     api_kind = gr.Radio(choices=["HuggingFace", "OpenAI"], value="HuggingFace")
     prompt_html = gr.HTML()
     # Turn off interactivity while generating if you click
     txt_msg = txt_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+        bot, [chatbot, api_kind], [chatbot, prompt_html])
     # Turn it back on
     txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
     # Turn off interactivity while generating if you hit enter
     txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+        bot, [chatbot, api_kind], [chatbot, prompt_html])
     # Turn it back on
     txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)

gradio_app/backend/query_llm.py CHANGED Viewed

@@ -7,19 +7,16 @@ from typing import Any, Dict, Generator, List
 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer
-tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
-temperature = 0.9
-top_p = 0.6
-repetition_penalty = 1.2
 OPENAI_KEY = getenv("OPENAI_API_KEY")
 HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN")
-hf_client = InferenceClient(
-        "mistralai/Mistral-7B-Instruct-v0.1",
-        token=HF_TOKEN
-        )
 def format_prompt(message: str, api_kind: str):
@@ -40,12 +37,12 @@ def format_prompt(message: str, api_kind: str):
         return messages
     elif api_kind == "hf":
         return tokenizer.apply_chat_template(messages, tokenize=False)
-    elif api_kind:
         raise ValueError("API is not supported")
-def generate_hf(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
-             top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
     """
     Generate a sequence of tokens based on a given prompt and history using Mistral client.
@@ -99,8 +96,8 @@ def generate_hf(prompt: str, history: str, temperature: float = 0.9, max_new_tok
             return "I do not know what happened, but I couldn't understand you."
-def generate_openai(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
-             top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
     """
     Generate a sequence of tokens based on a given prompt and history using Mistral client.

 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer
+from settings import *
+tokenizer = AutoTokenizer.from_pretrained(LLM_NAME)
 OPENAI_KEY = getenv("OPENAI_API_KEY")
 HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN")
+hf_client = InferenceClient(LLM_NAME, token=HF_TOKEN)
 def format_prompt(message: str, api_kind: str):
         return messages
     elif api_kind == "hf":
         return tokenizer.apply_chat_template(messages, tokenize=False)
+    else:
         raise ValueError("API is not supported")
+def generate_hf(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 512,
+             top_p: float = 0.6, repetition_penalty: float = 1.2) -> Generator[str, None, str]:
     """
     Generate a sequence of tokens based on a given prompt and history using Mistral client.
             return "I do not know what happened, but I couldn't understand you."
+def generate_openai(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 512,
+             top_p: float = 0.6, repetition_penalty: float = 1.2) -> Generator[str, None, str]:
     """
     Generate a sequence of tokens based on a given prompt and history using Mistral client.

gradio_app/backend/semantic_search.py CHANGED Viewed

@@ -1,18 +1,14 @@
 import logging
 import lancedb
-import os
-from pathlib import Path
 from sentence_transformers import SentenceTransformer
-EMB_MODEL_NAME = ""
-DB_TABLE_NAME = ""
 # Setting up the logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-retriever = SentenceTransformer(EMB_MODEL_NAME)
-# db
-db_uri = os.path.join(Path(__file__).parents[1], ".lancedb")
-db = lancedb.connect(db_uri)
-table = db.open_table(DB_TABLE_NAME)

 import logging
 import lancedb
 from sentence_transformers import SentenceTransformer
+from settings import *
 # Setting up the logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+embedder = SentenceTransformer(EMB_MODEL_NAME)
+db = lancedb.connect(LANCEDB_DIRECTORY)
+table = db.open_table(LANCEDB_TABLE_NAME)

gradio_app/templates/{template.j2 → prompt_template.j2} RENAMED Viewed

File without changes

prep_scripts/lancedb_setup.py CHANGED Viewed

@@ -1,3 +1,6 @@
 import lancedb
 import torch
 import pyarrow as pa
@@ -8,13 +11,16 @@ import numpy as np
 from sentence_transformers import SentenceTransformer
-EMB_MODEL_NAME = ""
-DB_TABLE_NAME = ""
-VECTOR_COLUMN_NAME = ""
-TEXT_COLUMN_NAME = ""
-INPUT_DIR = "<chunked docs directory>"
-db = lancedb.connect(".lancedb") # db location
 batch_size = 32
 model = SentenceTransformer(EMB_MODEL_NAME)
@@ -29,17 +35,17 @@ else:
 schema = pa.schema(
   [
-      pa.field(VECTOR_COLUMN_NAME, pa.list_(pa.float32(), 768)),
       pa.field(TEXT_COLUMN_NAME, pa.string())
   ])
-tbl = db.create_table(DB_TABLE_NAME, schema=schema, mode="overwrite")
-input_dir = Path(INPUT_DIR)
 files = list(input_dir.rglob("*"))
 sentences = []
 for file in files:
-    with open(file) as f:
         sentences.append(f.read())
 for i in tqdm.tqdm(range(0, int(np.ceil(len(sentences) / batch_size)))):
@@ -54,12 +60,15 @@ for i in tqdm.tqdm(range(0, int(np.ceil(len(sentences) / batch_size)))):
         })
         tbl.add(df)
     except:
-        print(f"batch {i} was skipped")
 '''
 create ivf-pd index https://lancedb.github.io/lancedb/ann_indexes/
 with the size of the transformer docs, index is not really needed
-but we'll do it for demonstrational purposes
 '''
-tbl.create_index(num_partitions=256, num_sub_vectors=96, vector_column_name=VECTOR_COLUMN_NAME)

+import shutil
+import traceback
 import lancedb
 import torch
 import pyarrow as pa
 from sentence_transformers import SentenceTransformer
+from settings import *
+emb_sizes = {
+    "sentence-transformers/all-MiniLM-L6-v2": 384,
+    "thenlper/gte-large": 0
+}
+shutil.rmtree(LANCEDB_DIRECTORY, ignore_errors=True)
+db = lancedb.connect(LANCEDB_DIRECTORY)
 batch_size = 32
 model = SentenceTransformer(EMB_MODEL_NAME)
 schema = pa.schema(
   [
+      pa.field(VECTOR_COLUMN_NAME, pa.list_(pa.float32(), emb_sizes[EMB_MODEL_NAME])),
       pa.field(TEXT_COLUMN_NAME, pa.string())
   ])
+tbl = db.create_table(LANCEDB_TABLE_NAME, schema=schema, mode="overwrite")
+input_dir = Path(TEXT_CHUNKS_DIR)
 files = list(input_dir.rglob("*"))
 sentences = []
 for file in files:
+    with open(file, encoding='utf-8') as f:
         sentences.append(f.read())
 for i in tqdm.tqdm(range(0, int(np.ceil(len(sentences) / batch_size)))):
         })
         tbl.add(df)
     except:
+        print(f"batch {i} was skipped: {traceback.format_exc()}")
 '''
 create ivf-pd index https://lancedb.github.io/lancedb/ann_indexes/
 with the size of the transformer docs, index is not really needed
+but we'll do it for demonstration purposes
 '''
+# tbl.create_index(num_partitions=256, num_sub_vectors=96, vector_column_name=VECTOR_COLUMN_NAME)

prep_scripts/markdown_to_text.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from bs4 import BeautifulSoup
 from markdown import markdown
 import os
 import re
 from pathlib import Path
-DIR_TO_SCRAPE = "data/transformers/docs/source/en/"
-OUTPUT_DIR = str(Path().resolve() / "docs_dump")
 def markdown_to_text(markdown_string):
@@ -20,7 +20,7 @@ def markdown_to_text(markdown_string):
     # extract text
     soup = BeautifulSoup(html, "html.parser")
-    text = ''.join(soup.findAll(text=True))
     text = re.sub('```(py|diff|python)', '', text)
     text = re.sub('```\n', '\n', text)
@@ -31,19 +31,20 @@ def markdown_to_text(markdown_string):
     return text
-dir_to_scrape = Path(DIR_TO_SCRAPE)
 files = list(dir_to_scrape.rglob("*"))
-os.makedirs(OUTPUT_DIR, exist_ok=True)
 for file in files:
     parent = file.parent.stem if file.parent.stem != dir_to_scrape.stem else ""
     if file.is_file():
-        with open(file) as f:
             md = f.read()
         text = markdown_to_text(md)
-        with open(os.path.join(OUTPUT_DIR, f"{parent}_{file.stem}.txt"), "w") as f:
             f.write(text)

+import shutil
 from bs4 import BeautifulSoup
 from markdown import markdown
 import os
 import re
 from pathlib import Path
+from settings import *
 def markdown_to_text(markdown_string):
     # extract text
     soup = BeautifulSoup(html, "html.parser")
+    text = ''.join(soup.findAll(string=True))
     text = re.sub('```(py|diff|python)', '', text)
     text = re.sub('```\n', '\n', text)
     return text
+dir_to_scrape = Path(MARKDOWN_DIR_TO_SCRAPE)
 files = list(dir_to_scrape.rglob("*"))
+shutil.rmtree(TEXT_CHUNKS_DIR, ignore_errors=True)
+os.makedirs(TEXT_CHUNKS_DIR)
 for file in files:
     parent = file.parent.stem if file.parent.stem != dir_to_scrape.stem else ""
     if file.is_file():
+        with open(file, encoding='utf-8') as f:
             md = f.read()
         text = markdown_to_text(md)
+        with open(os.path.join(TEXT_CHUNKS_DIR, f"{parent}_{file.stem}.txt"), "w", encoding='utf-8') as f:
             f.write(text)

settings.py ADDED Viewed

	@@ -0,0 +1,8 @@

+MARKDOWN_DIR_TO_SCRAPE = "data/transformers/docs/source/en/"
+TEXT_CHUNKS_DIR = "data/docs_dump"
+EMB_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+LANCEDB_DIRECTORY = "data/lancedb"
+LANCEDB_TABLE_NAME = "table"
+VECTOR_COLUMN_NAME = "embedding"
+TEXT_COLUMN_NAME = "text"
+LLM_NAME = "mistralai/Mistral-7B-Instruct-v0.1"