Spaces:

plaguss
/

argilla-sdk-chatbot-space

Runtime error

App Files Files Community

plaguss HF staff commited on Jun 28

Commit

68ffbe0

•

1 Parent(s): 4f3757c

Update app to register interactions in an argilla dataset

Browse files

Files changed (2) hide show

app.py +139 -47
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional, Any, Generator
 import os
 from pathlib import Path
 import tarfile
@@ -11,12 +11,14 @@ from huggingface_hub.file_download import hf_hub_download
 from huggingface_hub import InferenceClient, login
 from transformers import AutoTokenizer
 import gradio as gr
 @dataclass
 class Settings:
-    """Settings class to store useful variables for the App.
-    """
     LANCEDB: str = "lancedb"
     LANCEDB_FILE_TAR: str = "lancedb.tar.gz"
     TOKEN: str = os.getenv("HF_API_TOKEN")
@@ -24,13 +26,29 @@ class Settings:
     REPO_ID: str = "plaguss/argilla_sdk_docs_queries"
     TABLE_NAME: str = "docs"
     MODEL_NAME: str = "plaguss/bge-base-argilla-sdk-matryoshka"
-    DEVICE: str = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
     MODEL_ID: str = "meta-llama/Meta-Llama-3-70B-Instruct"
 settings = Settings()
 login(token=settings.TOKEN)
 def untar_file(source: Path) -> Path:
     """Untar and decompress files which have passed by `make_tarfile`.
@@ -51,7 +69,7 @@ def download_database(
     repo_id: str,
     lancedb_file: str = "lancedb.tar.gz",
     local_dir: Path = Path.home() / ".cache/argilla_sdk_docs_db",
-    token: str = os.getenv("HF_API_TOKEN")
 ) -> Path:
     """Helper function to download the database. Will download a compressed lancedb stored
     in a Hugging Face repository.
@@ -69,18 +87,18 @@ def download_database(
     """
     lancedb_download = Path(
         hf_hub_download(
-            repo_id,
-            lancedb_file,
-            repo_type="dataset",
-            token=token,
-            local_dir=local_dir
         )
     )
     return untar_file(lancedb_download)
 # Get the model to create the embeddings
-model = get_registry().get("sentence-transformers").create(name=settings.MODEL_NAME, device=settings.DEVICE)
 class Database:
@@ -90,7 +108,12 @@ class Database:
     the expected location. Once ready, the only functionality available is
     to retrieve the doc chunks to be used as examples for the LLM.
     """
     def __init__(self, settings: Settings) -> None:
         self.settings = settings
         self._table: lancedb.table.LanceTable = self.get_table_from_db()
@@ -110,39 +133,56 @@ class Database:
                 self.settings.REPO_ID,
                 lancedb_file=self.settings.LANCEDB_FILE_TAR,
                 local_dir=self.settings.LOCAL_DIR,
-                token=self.settings.TOKEN
             )
         db = lancedb.connect(str(lancedb_db_path))
         table = db.open_table(self.settings.TABLE_NAME)
         return table
-    def retrieve_doc_chunks(self, query: str, limit: int = 12, hard_limit: int = 4) -> str:
-        """Search for similar queries in the database, and return a list with
-        TODO: SPLIT IN TWO SEPARATE FUNCTIONS TO PREPARE THE CONTEXT.
         Args:
-            query (str): _description_
-            limit (int, optional): _description_. Defaults to 12.
-            hard_limit (int, optional): _description_. Defaults to 4.
         Returns:
-            str: _description_
         """
-        # Embed the query to use our custom model instead of the default one.
         embedded_query = model.generate_embeddings([query])
         field_to_retrieve = "text"
         retrieved = (
-            self._table
-                .search(embedded_query[0])
-                .metric("cosine")
-                .limit(limit)
-                .select([field_to_retrieve])  # Just grab the chunk to use for context
-                .to_list()
         )
-        # We have repeated questions (up to 4) for a given chunk, so we may get repeated chunks.
-        # Request more than necessary and filter them afterwards
         responses = []
         unique_responses = set()
@@ -164,8 +204,7 @@ database = Database(settings=settings)
 def get_client_and_tokenizer(
-    model_id: str = settings.MODEL_ID,
-    tokenizer_id: Optional[str] = None
 ) -> tuple[InferenceClient, AutoTokenizer]:
     """Obtains the inference client and the tokenizer corresponding to the model.
@@ -182,14 +221,9 @@ def get_client_and_tokenizer(
         tokenizer_id = model_id
     client = InferenceClient()
-    base_url = client._resolve_url(
-        model=model_id, task="text-generation"
-    )
     # Note: We could move to the AsyncClient
-    client = InferenceClient(
-        model=base_url,
-        token=os.getenv("HF_API_TOKEN")
-    )
     tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
     return client, tokenizer
@@ -204,7 +238,9 @@ client_kwargs = {
     "temperature": 0.3,
     "top_p": None,
     "top_k": None,
-    "stop_sequences": ["<|eot_id|>", "<|end_of_text|>"] if settings.MODEL_ID.startswith("meta-llama/Meta-Llama-3") else None,
     "seed": None,
 }
@@ -313,6 +349,42 @@ def prepare_input(message: str, history: list[tuple[str, str]]) -> str:
     )[0]
 def chatty(message: str, history: list[tuple[str, str]]) -> Generator[str, None, None]:
     """Main function of the app, contains the interaction with the LLM.
@@ -326,28 +398,48 @@ def chatty(message: str, history: list[tuple[str, str]]) -> Generator[str, None,
     """
     prompt = prepare_input(message, history)
-    partial_message = ""
-    for token_stream in client.text_generation(prompt=prompt, **client_kwargs):
-        partial_message += token_stream
-        yield partial_message
 if __name__ == "__main__":
     import gradio as gr
     gr.ChatInterface(
         chatty,
-        chatbot=gr.Chatbot(height=600),
-        textbox=gr.Textbox(placeholder="Ask me about the new argilla SDK", container=False, scale=7),
         title="Argilla SDK Chatbot",
         description="Ask a question about Argilla SDK",
         theme="soft",
         examples=[
             "How can I connect to an argilla server?",
             "How can I access a dataset?",
-            "How can I get the current user?"
         ],
         cache_examples=True,
         retry_btn=None,

+from typing import Optional, Generator
 import os
 from pathlib import Path
 import tarfile
 from huggingface_hub import InferenceClient, login
 from transformers import AutoTokenizer
 import gradio as gr
+import argilla as rg
+import uuid
 @dataclass
 class Settings:
+    """Settings class to store useful variables for the App."""
     LANCEDB: str = "lancedb"
     LANCEDB_FILE_TAR: str = "lancedb.tar.gz"
     TOKEN: str = os.getenv("HF_API_TOKEN")
     REPO_ID: str = "plaguss/argilla_sdk_docs_queries"
     TABLE_NAME: str = "docs"
     MODEL_NAME: str = "plaguss/bge-base-argilla-sdk-matryoshka"
+    DEVICE: str = (
+        "mps"
+        if torch.backends.mps.is_available()
+        else "cuda"
+        if torch.cuda.is_available()
+        else "cpu"
+    )
     MODEL_ID: str = "meta-llama/Meta-Llama-3-70B-Instruct"
+    ARGILLA_URL = r"https://plaguss-argilla-sdk-chatbot.hf.space"
+    ARGILLA_API_KEY = os.getenv("ARGILLA_CHATBOT_API_KEY")
+    ARGILLA_DATASET = "chatbot_interactions"
 settings = Settings()
 login(token=settings.TOKEN)
+client_rg = rg.Argilla(
+    api_url=settings.ARGILLA_URL,
+    api_key=settings.ARGILLA_API_KEY
+)
+argilla_dataset = client_rg.datasets(settings.ARGILLA_DATASET)
 def untar_file(source: Path) -> Path:
     """Untar and decompress files which have passed by `make_tarfile`.
     repo_id: str,
     lancedb_file: str = "lancedb.tar.gz",
     local_dir: Path = Path.home() / ".cache/argilla_sdk_docs_db",
+    token: str = os.getenv("HF_API_TOKEN"),
 ) -> Path:
     """Helper function to download the database. Will download a compressed lancedb stored
     in a Hugging Face repository.
     """
     lancedb_download = Path(
         hf_hub_download(
+            repo_id, lancedb_file, repo_type="dataset", token=token, local_dir=local_dir
         )
     )
     return untar_file(lancedb_download)
 # Get the model to create the embeddings
+model = (
+    get_registry()
+    .get("sentence-transformers")
+    .create(name=settings.MODEL_NAME, device=settings.DEVICE)
+)
 class Database:
     the expected location. Once ready, the only functionality available is
     to retrieve the doc chunks to be used as examples for the LLM.
     """
     def __init__(self, settings: Settings) -> None:
+        """
+        Args:
+            settings: Instance of the settings.
+        """
         self.settings = settings
         self._table: lancedb.table.LanceTable = self.get_table_from_db()
                 self.settings.REPO_ID,
                 lancedb_file=self.settings.LANCEDB_FILE_TAR,
                 local_dir=self.settings.LOCAL_DIR,
+                token=self.settings.TOKEN,
             )
         db = lancedb.connect(str(lancedb_db_path))
         table = db.open_table(self.settings.TABLE_NAME)
         return table
+    def retrieve_doc_chunks(
+        self, query: str, limit: int = 12, hard_limit: int = 4
+    ) -> str:
+        """Search for similar queries in the database, and return the context to be passed
+        to the LLM.
         Args:
+            query: Query from the user.
+            limit: Number of similar items to retrieve. Defaults to 12.
+            hard_limit: Limit of responses to take into account.
+                As we generated repeated questions initially, the database may contain
+                repeated chunks of documents, in the initial `limit` selection, using
+                `hard_limit` we limit to this number the total of unique retrieved chunks.
+                Defaults to 4.
         Returns:
+            The context to be used by the model to generate the response.
         """
+        # Embed the query to use our custom model instead of the default one.
         embedded_query = model.generate_embeddings([query])
         field_to_retrieve = "text"
         retrieved = (
+            self._table.search(embedded_query[0])
+            .metric("cosine")
+            .limit(limit)
+            .select([field_to_retrieve])  # Just grab the chunk to use for context
+            .to_list()
         )
+        return self._prepare_context(retrieved, hard_limit)
+    @staticmethod
+    def _prepare_context(retrieved: list[dict[str, str]], hard_limit: int) -> str:
+        """Prepares the examples to be used in the LLM prompt.
+        Args:
+            retrieved: The list of retrieved chunks.
+            hard_limit: Max number of doc pieces to return.
+        Returns:
+            Context to be used by the LLM.
+        """
+        # We have repeated questions (up to 4) for a given chunk, so we may get repeated chunks.
+        # Request more than necessary and filter them afterwards
         responses = []
         unique_responses = set()
 def get_client_and_tokenizer(
+    model_id: str = settings.MODEL_ID, tokenizer_id: Optional[str] = None
 ) -> tuple[InferenceClient, AutoTokenizer]:
     """Obtains the inference client and the tokenizer corresponding to the model.
         tokenizer_id = model_id
     client = InferenceClient()
+    base_url = client._resolve_url(model=model_id, task="text-generation")
     # Note: We could move to the AsyncClient
+    client = InferenceClient(model=base_url, token=os.getenv("HF_API_TOKEN"))
     tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
     return client, tokenizer
     "temperature": 0.3,
     "top_p": None,
     "top_k": None,
+    "stop_sequences": ["<|eot_id|>", "<|end_of_text|>"]
+    if settings.MODEL_ID.startswith("meta-llama/Meta-Llama-3")
+    else None,
     "seed": None,
 }
     )[0]
+def create_chat_html(history: list[tuple[str, str]]) -> str:
+    """Helper function to create a conversation in HTML in argilla.
+    Args:
+        history: History of messages with the chatbot.
+    Returns:
+        HTML formatted conversation.
+    """
+    chat_html = ""
+    alignments = ["right", "left"]
+    colors = ["#c2e3f7", "#f5f5f5"]
+    for turn in history:
+        # Create the HTML message div with inline styles
+        message_html = ""
+        # To include message still not answered
+        (user, assistant) = turn
+        if assistant is None:
+            turn = (user, )
+        for i, content in enumerate(turn):
+            message_html += f'<div style="display: flex; justify-content: {alignments[i]}; margin: 10px;">'
+            message_html += f'<div style="background-color: {colors[i]}; padding: 10px; border-radius: 10px; max-width: 70%; word-wrap: break-word;">{content}</div>'
+            message_html += "</div>"
+        # Add the message to the chat HTML
+        chat_html += message_html
+    return chat_html
+conv_id = str(uuid.uuid4())
 def chatty(message: str, history: list[tuple[str, str]]) -> Generator[str, None, None]:
     """Main function of the app, contains the interaction with the LLM.
     """
     prompt = prepare_input(message, history)
+    partial_response = ""
+    for token_stream in client.text_generation(prompt=prompt, **client_kwargs):
+        partial_response += token_stream
+        yield partial_response
+    global conv_id
+    new_conversation = len(history) == 0
+    if new_conversation:
+        conv_id = str(uuid.uuid4())
+    else:
+        history.append((message, None))
+    # Register to argilla dataset
+    argilla_dataset.records.log(
+        [
+            {
+                "instruction": create_chat_html(history) if history else message,
+                "response": partial_response,
+                "conv_id": conv_id,
+                "turn": len(history)
+            },
+        ]
+    )
 if __name__ == "__main__":
     import gradio as gr
     gr.ChatInterface(
         chatty,
+        chatbot=gr.Chatbot(height=700),
+        textbox=gr.Textbox(
+            placeholder="Ask me about the new argilla SDK", container=False, scale=7
+        ),
         title="Argilla SDK Chatbot",
         description="Ask a question about Argilla SDK",
         theme="soft",
         examples=[
             "How can I connect to an argilla server?",
             "How can I access a dataset?",
+            "How can I get the current user?",
         ],
         cache_examples=True,
         retry_btn=None,

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 torch==2.3.1
 lancedb==0.8.2
-sentence-transformers==3.0.1

 torch==2.3.1
 lancedb==0.8.2
+sentence-transformers==3.0.1
+argilla==2.0.0rc1