Spaces:

ccm
/

chat-with-SFF

Sleeping

ccm commited on Oct 31, 2024

Commit

316ac93

verified ·

1 Parent(s): 1d48be6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ PUBLICATIONS_TO_RETRIEVE = 10
 def embedding(
-    device: str = "mps", normalize_embeddings: bool = False
 ) -> langchain_huggingface.HuggingFaceEmbeddings:
     """Loads embedding model with specified device and normalization."""
     return langchain_huggingface.HuggingFaceEmbeddings(
@@ -70,16 +70,18 @@ def preprocess(query: str, k: int) -> str:
 @spaces.GPU
-def reply(message: str) -> str:
     """
     Generates a response to the user’s message.
     """
     # Preprocess message
-    pipe = transformers.pipeline("text-generation", model="Qwen/Qwen2.5-7B-Instruct")
     message = preprocess(message, PUBLICATIONS_TO_RETRIEVE)
-    return pipe(message, max_new_tokens=512, device="mps")[0]["generated_text"]
 # Example Queries for Interface

 def embedding(
+    device: str = "cuda", normalize_embeddings: bool = False
 ) -> langchain_huggingface.HuggingFaceEmbeddings:
     """Loads embedding model with specified device and normalization."""
     return langchain_huggingface.HuggingFaceEmbeddings(
 @spaces.GPU
+def reply(message: str, history: list[str]) -> str:
     """
     Generates a response to the user’s message.
     """
     # Preprocess message
+    pipe = transformers.pipeline(
+        "text-generation", model="Qwen/Qwen2.5-7B-Instruct", device="cuda"
+    )
     message = preprocess(message, PUBLICATIONS_TO_RETRIEVE)
+    return pipe(message, max_new_tokens=512)[0]["generated_text"]
 # Example Queries for Interface