Spaces:

ccm
/

chat-with-SFF

Sleeping

App Files Files Community

ccm commited on Nov 1, 2024

Commit

206bc94

verified ·

1 Parent(s): 9cdb1a5

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -52

app.py CHANGED Viewed

@@ -1,71 +1,104 @@
 import gradio  # Interface handling
 import spaces  # For GPU
 import transformers  # LLM Loading
 import langchain_community.vectorstores  # Vectorstore for publications
 import langchain_huggingface  # Embeddings
-# Greeting message
-GREETING = (
-    "Howdy! I'm an AI agent that uses "
-    "[retrieval-augmented generation](https://en.wikipedia.org/wiki/Retrieval-augmented_generation) "
-    "to answer questions about additive manufacturing research. "
-    "I'm still improving, so bear with me if I make any mistakes. "
-    "What can I help you with today?"
-)
-# Constants
-EMBEDDING_MODEL_NAME = "all-MiniLM-L12-v2"
-LLM_MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
-PUBLICATIONS_TO_RETRIEVE = 10
-def embedding(
-    device: str = "cuda", normalize_embeddings: bool = False
-) -> langchain_huggingface.HuggingFaceEmbeddings:
-    """Loads embedding model with specified device and normalization."""
-    return langchain_huggingface.HuggingFaceEmbeddings(
-        model_name=EMBEDDING_MODEL_NAME,
-        model_kwargs={"device": device},
-        encode_kwargs={"normalize_embeddings": normalize_embeddings},
-    )
-def load_publication_vectorstore() -> langchain_community.vectorstores.FAISS:
-    """Load the publication vectorstore safely."""
-    return langchain_community.vectorstores.FAISS.load_local(
-        folder_path="publication_vectorstore",
-        embeddings=embedding(),
-        allow_dangerous_deserialization=True,
-    )
-# Load vectorstore and models
-publication_vectorstore = load_publication_vectorstore()
-def preprocess(query: str, k: int) -> str:
     """
     Generates a prompt based on the top k documents matching the query.
     """
-    documents = publication_vectorstore.search(query, k=k, search_type="similarity")
-    research_excerpts = [f'"... {doc.page_content}..."' for doc in documents]
-    # Prompt template
-    prompt_template = (
-        "You are an AI assistant who enjoys helping users learn about research. "
-        "Answer the following question on additive manufacturing research using the RESEARCH_EXCERPTS. "
-        "Provide a concise ANSWER based on these excerpts. Avoid listing references.\n\n"
-        "===== RESEARCH_EXCERPTS =====\n{research_excerpts}\n\n"
-        "===== USER_QUERY =====\n{query}\n\n"
-        "===== ANSWER =====\n"
     )
-    prompt = prompt_template.format(
         research_excerpts="\n\n".join(research_excerpts), query=query
     )
-    print(prompt)  # Useful for debugging prompt content
     return prompt
@@ -73,15 +106,22 @@ def preprocess(query: str, k: int) -> str:
 def reply(message: str, history: list[str]) -> str:
     """
     Generates a response to the user’s message.
-    """
-    # Preprocess message
-    pipe = transformers.pipeline(
-        "text-generation", model="Qwen/Qwen2.5-7B-Instruct", device="cuda"
-    )
-    message = preprocess(message, PUBLICATIONS_TO_RETRIEVE)
-    return pipe(message, max_new_tokens=512, return_full_text=False)[0]["generated_text"]
 # Example Queries for Interface

+"""
+This script sets up a Gradio interface for querying an AI assistant about additive manufacturing research.
+It uses a vectorstore to retrieve relevant research excerpts and a language model to generate responses.
+Modules:
+    - gradio: Interface handling
+    - spaces: For GPU
+    - transformers: LLM Loading
+    - langchain_community.vectorstores: Vectorstore for publications
+    - langchain_huggingface: Embeddings
+Constants:
+    - PUBLICATIONS_TO_RETRIEVE: The number of publications to retrieve for the prompt
+    - RAG_TEMPLATE: The template for the RAG prompt
+Functions:
+    - preprocess(query: str) -> str: Generates a prompt based on the top k documents matching the query.
+    - reply(message: str, history: list[str]) -> str: Generates a response to the user’s message.
+Example Queries:
+    - "What is multi-material 3D printing?"
+    - "How is additive manufacturing being applied in aerospace?"
+    - "Tell me about innovations in metal 3D printing techniques."
+    - "What are some sustainable materials for 3D printing?"
+    - "What are the biggest challenges with support structures in additive manufacturing?"
+    - "How is 3D printing impacting the medical field?"
+    - "What are some common applications of additive manufacturing in industry?"
+    - "What are the benefits and limitations of using polymers in 3D printing?"
+    - "Tell me about the environmental impacts of additive manufacturing."
+    - "What are the primary limitations of current 3D printing technologies?"
+    - "How are researchers improving the speed of 3D printing processes?"
+    - "What are the best practices for managing post-processing in additive manufacturing?"
+"""
 import gradio  # Interface handling
 import spaces  # For GPU
 import transformers  # LLM Loading
 import langchain_community.vectorstores  # Vectorstore for publications
 import langchain_huggingface  # Embeddings
+# The number of publications to retrieve for the prompt
+PUBLICATIONS_TO_RETRIEVE = 5
+# The template for the RAG prompt
+RAG_TEMPLATE = """You are an AI assistant who enjoys helping users learn about research.
+Answer the USER_QUERY on additive manufacturing research using the RESEARCH_EXCERPTS.
+Provide a concise ANSWER based on these excerpts. Avoid listing references.
+===== RESEARCH_EXCERPTS =====
+{research_excerpts}
+===== USER_QUERY =====
+{query}
+===== ANSWER =====
+"""
+# Load vectorstore of SFF publications
+publication_vectorstore = langchain_community.vectorstores.FAISS.load_local(
+    folder_path="publication_vectorstore",
+    embeddings=langchain_huggingface.HuggingFaceEmbeddings(
+        model_name="all-MiniLM-L12-v2",
+        model_kwargs={"device": "cuda"},
+        encode_kwargs={"normalize_embeddings": False},
+    ),
+    allow_dangerous_deserialization=True,
+)
+# Create the callable LLM
+llm = transformers.pipeline(
+    "text-generation", model="Qwen/Qwen2.5-7B-Instruct", device="cuda"
+)
+def preprocess(query: str) -> str:
     """
     Generates a prompt based on the top k documents matching the query.
+    Args:
+        query (str): The user's query.
+    Returns:
+        str: The formatted prompt containing research excerpts and the user's query.
     """
+    # Search for the top k documents matching the query
+    documents = publication_vectorstore.search(
+        query, k=PUBLICATIONS_TO_RETRIEVE, search_type="similarity"
     )
+    # Extract the page content from the documents
+    research_excerpts = [f'"... {doc.page_content}..."' for doc in documents]
+    # Format the prompt with the research excerpts and the user's query
+    prompt = RAG_TEMPLATE.format(
         research_excerpts="\n\n".join(research_excerpts), query=query
     )
+    # Print the prompt for debugging purposes
+    print(prompt)
     return prompt
 def reply(message: str, history: list[str]) -> str:
     """
     Generates a response to the user’s message.
+    Args:
+        message (str): The user's message or query.
+        history (list[str]): The conversation history.
+    Returns:
+        str: The generated response from the language model.
+    """
+    return llm(
+        preprocess(message),
+        max_new_tokens=512,
+        return_full_text=False,
+    )[
+        0
+    ]["generated_text"]
 # Example Queries for Interface