Spaces:

StevenChen16
/

AI-Lawyer

Runtime error

App Files Files Community

StevenChen16 commited on Nov 19, 2024

Commit

5872c96

1 Parent(s): 1df3e06

修改bug并且删除vector_store的重复初始化

Browse files

Files changed (1) hide show

app.py +22 -29

app.py CHANGED Viewed

@@ -94,7 +94,7 @@ try:
 except Exception as e:
     raise RuntimeError(f"Failed to load vector store from HuggingFace Hub: {str(e)}")
-# vector_store = FAISS.load_local(repo_path, embedding_model, allow_dangerous_deserialization=True)
 background_prompt = '''
@@ -155,19 +155,14 @@ Now, please guide me step by step to describe the legal issues I am facing, acco
 def query_vector_store(vector_store: FAISS, query, k=4, relevance_threshold=0.8):
     """
-    从向量存储中查询相似文档。
-    参数:
-        vector_store (FAISS): 向量存储实例
-        query (str): 查询内容
-        k (int): 返回文档数量
-        relevance_threshold (float): 相关性阈值
-    返回:
-        context (list): 查询到的上下文内容
     """
-    retriever = vector_store.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": relevance_threshold, "k": k})
     similar_docs = retriever.invoke(query)
     context = [doc.page_content for doc in similar_docs]
-    return context
 @spaces.GPU(duration=120)
 def chat_llama3_8b(message: str,
@@ -177,40 +172,39 @@ def chat_llama3_8b(message: str,
              ) -> str:
     """
     Generate a streaming response using the llama3-8b model.
-    Args:
-        message (str): The input message.
-        history (list): The conversation history used by ChatInterface.
-        temperature (float): The temperature for generating the response.
-        max_new_tokens (int): The maximum number of new tokens to generate.
-    Returns:
-        str: The generated response.
     """
     citation = query_vector_store(vector_store, message, 4, 0.7)
-    if citation != None:
-        context = "Based on this citations: " + citation + "please answer questions:"
     conversation = []
     for user, assistant in history:
-        # content = background_prompt + user
-        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
-    if citation != None:
-        message = background_prompt + context + message
     else:
-        message = background_prompt + message
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
-        input_ids= input_ids,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
         eos_token_id=terminators,
     )
-    # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
     if temperature == 0:
         generate_kwargs['do_sample'] = False
@@ -220,7 +214,6 @@ def chat_llama3_8b(message: str,
     outputs = []
     for text in streamer:
         outputs.append(text)
-        #print(outputs)
         yield "".join(outputs)

 except Exception as e:
     raise RuntimeError(f"Failed to load vector store from HuggingFace Hub: {str(e)}")
+vector_store = FAISS.load_local(repo_path, embedding_model, allow_dangerous_deserialization=True)
 background_prompt = '''
 def query_vector_store(vector_store: FAISS, query, k=4, relevance_threshold=0.8):
     """
+    Query similar documents from vector store.
     """
+    retriever = vector_store.as_retriever(search_type="similarity_score_threshold",
+                                        search_kwargs={"score_threshold": relevance_threshold, "k": k})
     similar_docs = retriever.invoke(query)
     context = [doc.page_content for doc in similar_docs]
+    # Join the context list into a single string
+    return " ".join(context) if context else ""
 @spaces.GPU(duration=120)
 def chat_llama3_8b(message: str,
              ) -> str:
     """
     Generate a streaming response using the llama3-8b model.
     """
+    # Get citations from vector store
     citation = query_vector_store(vector_store, message, 4, 0.7)
+    # Build conversation history
     conversation = []
     for user, assistant in history:
+        conversation.extend([
+            {"role": "user", "content": user},
+            {"role": "assistant", "content": assistant}
+        ])
+    # Construct the final message with background prompt and citations
+    if citation:
+        message = f"{background_prompt}Based on these citations: {citation}\nPlease answer question: {message}"
     else:
+        message = f"{background_prompt}{message}"
     conversation.append({"role": "user", "content": message})
+    # Generate response
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
+        input_ids=input_ids,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
         eos_token_id=terminators,
     )
     if temperature == 0:
         generate_kwargs['do_sample'] = False
     outputs = []
     for text in streamer:
         outputs.append(text)
         yield "".join(outputs)