gufett0 commited on
Commit
91d2747
·
1 Parent(s): 9a196a8

switched to chat engine

Browse files
Files changed (2) hide show
  1. backend.py +12 -47
  2. interface.py +3 -4
backend.py CHANGED
@@ -55,62 +55,27 @@ def build_index():
55
 
56
  @spaces.GPU(duration=20)
57
  def handle_query(query_str, chathistory):
58
-
59
  index = build_index()
60
-
61
- qa_prompt_str = (
62
- "Context information is below.\n"
63
- "---------------------\n"
64
- "{context_str}\n"
65
- "---------------------\n"
66
- "Given the context information and not prior knowledge, "
67
- "answer the question: {query_str}\n"
68
- )
69
-
70
- # Text QA Prompt
71
- chat_text_qa_msgs = [
72
- (
73
- "system",
74
- "Sei un assistente italiano di nome Ossy che risponde solo alle domande o richieste pertinenti. ",
75
- ),
76
- ("user", qa_prompt_str),
77
- ]
78
- text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
79
-
80
- try:
81
- # Create a streaming query engine
82
- """query_engine = index.as_query_engine(text_qa_template=text_qa_template, streaming=False, similarity_top_k=1)
83
-
84
- # Execute the query
85
- streaming_response = query_engine.query(query_str)
86
-
87
- r = streaming_response.response
88
- cleaned_result = r.replace("<end_of_turn>", "").strip()
89
- yield cleaned_result"""
90
-
91
- # Stream the response
92
- """outputs = []
93
- for text in streaming_response.response_gen:
94
-
95
- outputs.append(str(text))
96
- yield "".join(outputs)"""
97
-
98
- memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
99
- chat_engine = index.as_chat_engine(
100
  chat_mode="context",
101
  memory=memory,
102
  system_prompt=(
103
  "Sei un assistente italiano di nome Ossy che risponde solo alle domande o richieste pertinenti. "
104
- ),
105
- )
106
-
 
107
  response = chat_engine.stream_chat(query_str)
108
  for token in response.response_gen:
109
- yield token
110
-
111
-
112
  except Exception as e:
113
  yield f"Error processing query: {str(e)}"
 
 
 
114
 
115
 
116
 
 
55
 
56
  @spaces.GPU(duration=20)
57
  def handle_query(query_str, chathistory):
 
58
  index = build_index()
59
+
60
+ memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
61
+ chat_engine = index.as_chat_engine(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  chat_mode="context",
63
  memory=memory,
64
  system_prompt=(
65
  "Sei un assistente italiano di nome Ossy che risponde solo alle domande o richieste pertinenti. "
66
+ ),
67
+ )
68
+
69
+ try:
70
  response = chat_engine.stream_chat(query_str)
71
  for token in response.response_gen:
72
+ if token.strip(): # Only yield non-empty tokens
73
+ yield token
 
74
  except Exception as e:
75
  yield f"Error processing query: {str(e)}"
76
+ finally:
77
+ # You might want to add any cleanup code here
78
+ pass
79
 
80
 
81
 
interface.py CHANGED
@@ -68,9 +68,8 @@ class GemmaLLMInterface(CustomLLM):
68
  @llm_completion_callback()
69
  def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
70
  streamer, generate_kwargs = self._prepare_generation(prompt)
71
-
72
- t = Thread(target=self.model.generate, kwargs=generate_kwargs)
73
- t.start()
74
-
75
  for new_token in streamer:
76
  yield CompletionResponse(text=new_token)
 
68
  @llm_completion_callback()
69
  def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
70
  streamer, generate_kwargs = self._prepare_generation(prompt)
71
+
72
+ self.model.generate(**generate_kwargs) # Run synchronously.
73
+
 
74
  for new_token in streamer:
75
  yield CompletionResponse(text=new_token)