Spaces:

ccm
/

chat-with-SFF

Sleeping

App Files Files Community

ccm commited on Oct 31, 2024

Commit

1d48be6

verified ·

1 Parent(s): 306420e

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -16

app.py CHANGED Viewed

@@ -16,12 +16,12 @@ GREETING = (
 # Constants
 EMBEDDING_MODEL_NAME = "all-MiniLM-L12-v2"
-LLM_MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
 PUBLICATIONS_TO_RETRIEVE = 10
 def embedding(
-    device: str = "cuda", normalize_embeddings: bool = False
 ) -> langchain_huggingface.HuggingFaceEmbeddings:
     """Loads embedding model with specified device and normalization."""
     return langchain_huggingface.HuggingFaceEmbeddings(
@@ -33,15 +33,11 @@ def embedding(
 def load_publication_vectorstore() -> langchain_community.vectorstores.FAISS:
     """Load the publication vectorstore safely."""
-    try:
-        return langchain_community.vectorstores.FAISS.load_local(
-            folder_path="publication_vectorstore",
-            embeddings=embedding(),
-            allow_dangerous_deserialization=True,
-        )
-    except Exception as e:
-        print(f"Error loading vectorstore: {e}")
-        return None
 # Load vectorstore and models
@@ -60,9 +56,9 @@ def preprocess(query: str, k: int) -> str:
         "You are an AI assistant who enjoys helping users learn about research. "
         "Answer the following question on additive manufacturing research using the RESEARCH_EXCERPTS. "
         "Provide a concise ANSWER based on these excerpts. Avoid listing references.\n\n"
-        "===== RESEARCH_EXCERPTS =====:\n{research_excerpts}\n\n"
-        "===== USER_QUERY =====:\n{query}\n\n"
-        "===== ANSWER =====:\n"
     )
     prompt = prompt_template.format(
@@ -74,7 +70,7 @@ def preprocess(query: str, k: int) -> str:
 @spaces.GPU
-def reply(message: str, history: list[str]) -> str:
     """
     Generates a response to the user’s message.
     """
@@ -83,7 +79,7 @@ def reply(message: str, history: list[str]) -> str:
     pipe = transformers.pipeline("text-generation", model="Qwen/Qwen2.5-7B-Instruct")
     message = preprocess(message, PUBLICATIONS_TO_RETRIEVE)
-    return pipe(message, max_length=512)[0]["generated_text"]
 # Example Queries for Interface

 # Constants
 EMBEDDING_MODEL_NAME = "all-MiniLM-L12-v2"
+LLM_MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
 PUBLICATIONS_TO_RETRIEVE = 10
 def embedding(
+    device: str = "mps", normalize_embeddings: bool = False
 ) -> langchain_huggingface.HuggingFaceEmbeddings:
     """Loads embedding model with specified device and normalization."""
     return langchain_huggingface.HuggingFaceEmbeddings(
 def load_publication_vectorstore() -> langchain_community.vectorstores.FAISS:
     """Load the publication vectorstore safely."""
+    return langchain_community.vectorstores.FAISS.load_local(
+        folder_path="publication_vectorstore",
+        embeddings=embedding(),
+        allow_dangerous_deserialization=True,
+    )
 # Load vectorstore and models
         "You are an AI assistant who enjoys helping users learn about research. "
         "Answer the following question on additive manufacturing research using the RESEARCH_EXCERPTS. "
         "Provide a concise ANSWER based on these excerpts. Avoid listing references.\n\n"
+        "===== RESEARCH_EXCERPTS =====\n{research_excerpts}\n\n"
+        "===== USER_QUERY =====\n{query}\n\n"
+        "===== ANSWER =====\n"
     )
     prompt = prompt_template.format(
 @spaces.GPU
+def reply(message: str) -> str:
     """
     Generates a response to the user’s message.
     """
     pipe = transformers.pipeline("text-generation", model="Qwen/Qwen2.5-7B-Instruct")
     message = preprocess(message, PUBLICATIONS_TO_RETRIEVE)
+    return pipe(message, max_new_tokens=512, device="mps")[0]["generated_text"]
 # Example Queries for Interface