Spaces:

Aragoner
/

rag_project

Sleeping

App Files Files Community

Aragoner commited on Feb 21, 2024

Commit

7e92170

verified ·

1 Parent(s): 039472c

Upload 8 files

Browse files

Files changed (8) hide show

backend/.DS_Store +0 -0
backend/__pycache__/query_llm.cpython-310.pyc +0 -0
backend/__pycache__/query_llm.cpython-37.pyc +0 -0
backend/__pycache__/query_llm.cpython-39.pyc +0 -0
backend/__pycache__/semantic_search.cpython-310.pyc +0 -0
backend/__pycache__/semantic_search.cpython-39.pyc +0 -0
backend/query_llm.py +129 -0
backend/semantic_search.py +59 -0

backend/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

backend/__pycache__/query_llm.cpython-310.pyc ADDED Viewed

Binary file (4.03 kB). View file

backend/__pycache__/query_llm.cpython-37.pyc ADDED Viewed

Binary file (4.01 kB). View file

backend/__pycache__/query_llm.cpython-39.pyc ADDED Viewed

Binary file (4.04 kB). View file

backend/__pycache__/semantic_search.cpython-310.pyc ADDED Viewed

Binary file (1.18 kB). View file

backend/__pycache__/semantic_search.cpython-39.pyc ADDED Viewed

Binary file (2.14 kB). View file

backend/query_llm.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import openai
+import gradio as gr
+import os
+from typing import Any, Dict, Generator, List
+from huggingface_hub import InferenceClient
+from transformers import AutoTokenizer
+from dotenv import load_dotenv
+load_dotenv()
+OPENAI_KEY = os.getenv("OPENAI_API_KEY")
+HF_TOKEN = os.getenv("HF_TOKEN")
+TOKENIZER = AutoTokenizer.from_pretrained(os.getenv("HF_MODEL"))
+HF_CLIENT = InferenceClient(
+    os.getenv("HF_MODEL"),
+    token=HF_TOKEN
+)
+OAI_CLIENT = openai.Client(api_key=OPENAI_KEY)
+HF_GENERATE_KWARGS = {
+    'temperature': max(float(os.getenv("TEMPERATURE", 0.9)), 1e-2),
+    'max_new_tokens': int(os.getenv("MAX_NEW_TOKENS", 256)),
+    'top_p': float(os.getenv("TOP_P", 0.6)),
+    'repetition_penalty': float(os.getenv("REP_PENALTY", 1.2)),
+    'do_sample': bool(os.getenv("DO_SAMPLE", True))
+}
+OAI_GENERATE_KWARGS = {
+    'temperature': max(float(os.getenv("TEMPERATURE", 0.9)), 1e-2),
+    'max_tokens': int(os.getenv("MAX_NEW_TOKENS", 256)),
+    'top_p': float(os.getenv("TOP_P", 0.6)),
+    'frequency_penalty': max(-2, min(float(os.getenv("FREQ_PENALTY", 0)), 2))
+}
+def format_prompt(message: str, api_kind: str):
+    """
+    Formats the given message using a chat template.
+    Args:
+        message (str): The user message to be formatted.
+        api_kind (str): LLM API provider.
+    Returns:
+        str: Formatted message after applying the chat template.
+    """
+    # Create a list of message dictionaries with role and content
+    messages: List[Dict[str, Any]] = [{'role': 'user', 'content': message}]
+    if api_kind == "openai":
+        return messages
+    elif api_kind == "hf":
+        return TOKENIZER.apply_chat_template(messages, tokenize=False)
+    elif api_kind:
+        raise ValueError("API is not supported")
+def generate_hf(prompt: str, history: str) -> Generator[str, None, str]:
+    """
+    Generate a sequence of tokens based on a given prompt and history using Mistral client.
+    Args:
+        prompt (str): The prompt for the text generation.
+        history (str): Context or history for the text generation.
+    Returns:
+        Generator[str, None, str]: A generator yielding chunks of generated text.
+                                   Returns a final string if an error occurs.
+    """
+    formatted_prompt = format_prompt(prompt, "hf")
+    formatted_prompt = formatted_prompt.encode("utf-8").decode("utf-8")
+    try:
+        stream = HF_CLIENT.text_generation(
+            formatted_prompt,
+            **HF_GENERATE_KWARGS,
+            stream=True,
+            details=True,
+            return_full_text=False
+        )
+        output = ""
+        for response in stream:
+            output += response.token.text
+            yield output
+    except Exception as e:
+        if "Too Many Requests" in str(e):
+            raise gr.Error(f"Too many requests: {str(e)}")
+        elif "Authorization header is invalid" in str(e):
+            raise gr.Error("Authentication error: HF token was either not provided or incorrect")
+        else:
+            raise gr.Error(f"Unhandled Exception: {str(e)}")
+def generate_openai(prompt: str, history: str) -> Generator[str, None, str]:
+    """
+    Generate a sequence of tokens based on a given prompt and history using Mistral client.
+    Args:
+        prompt (str): The initial prompt for the text generation.
+        history (str): Context or history for the text generation.
+    Returns:
+        Generator[str, None, str]: A generator yielding chunks of generated text.
+                                   Returns a final string if an error occurs.
+    """
+    formatted_prompt = format_prompt(prompt, "openai")
+    try:
+        stream = OAI_CLIENT.chat.completions.create(
+            model=os.getenv("OPENAI_MODEL"),
+            messages=formatted_prompt,
+            **OAI_GENERATE_KWARGS,
+            stream=True
+        )
+        output = ""
+        for chunk in stream:
+            if chunk.choices[0].delta.content:
+                output += chunk.choices[0].delta.content
+                yield output
+    except Exception as e:
+        if "Too Many Requests" in str(e):
+            raise gr.Error("ERROR: Too many requests on OpenAI client")
+        elif "You didn't provide an API key" in str(e):
+            raise gr.Error("Authentication error: OpenAI key was either not provided or incorrect")
+        else:
+            raise gr.Error(f"Unhandled Exception: {str(e)}")

backend/semantic_search.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import lancedb
+import os
+import gradio as gr
+from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import time
+import os
+from pathlib import Path
+db = lancedb.connect(".lancedb")
+TABLE = db.open_table(os.getenv("TABLE_NAME"))
+VECTOR_COLUMN = os.getenv("VECTOR_COLUMN", "vector")
+TEXT_COLUMN = os.getenv("TEXT_COLUMN", "text")
+BATCH_SIZE = int(os.getenv("BATCH_SIZE", 32))
+CROSS_ENCODER = os.getenv("CROSS_ENCODER")
+retriever = SentenceTransformer(os.getenv("EMB_MODEL"))
+cross_encoder = AutoModelForSequenceClassification.from_pretrained(CROSS_ENCODER)
+cross_encoder.eval()
+cross_encoder_tokenizer = AutoTokenizer.from_pretrained(CROSS_ENCODER)
+def rerank(query, documents, k):
+    """Use cross-encoder to rerank documents retrieved from the retriever."""
+    tokens = cross_encoder_tokenizer([query] * len(documents), documents, padding=True, truncation=True, return_tensors="pt")
+    with torch.no_grad():
+        logits = cross_encoder(**tokens).logits
+    scores = logits.reshape(-1).tolist()
+    documents = sorted(zip(documents, scores), key=lambda x: x[1], reverse=True)
+    return [doc[0] for doc in documents[:k]]
+# def retrieve(query, k):
+#     query_vec = retriever.encode(query)
+#     try:
+#         documents = TABLE.search(query_vec, vector_column_name=VECTOR_COLUMN).limit(k).to_list()
+#         documents = [doc[TEXT_COLUMN] for doc in documents]
+#
+#         return documents
+#
+#     except Exception as e:
+#         raise gr.Error(str(e))
+def retrieve(query, top_k_retriever=25, use_reranking=True, top_k_reranker=5):
+    query_vec = retriever.encode(query)
+    try:
+        documents = TABLE.search(query_vec, vector_column_name=VECTOR_COLUMN).limit(top_k_retriever).to_list()
+        documents = [doc[TEXT_COLUMN] for doc in documents]
+        if use_reranking:
+            documents = rerank(query, documents, top_k_reranker)
+        return documents
+    except Exception as e:
+        raise gr.Error(str(e))