Spaces:
Sleeping
Sleeping
added memory
Browse files- backend.py +9 -32
backend.py
CHANGED
@@ -13,8 +13,10 @@ from llama_cpp import Llama
|
|
13 |
import spaces
|
14 |
from huggingface_hub import login
|
15 |
from llama_index.core.memory import ChatMemoryBuffer
|
16 |
-
from typing import Iterator
|
17 |
from llama_index.core.chat_engine import CondensePlusContextChatEngine
|
|
|
|
|
18 |
|
19 |
|
20 |
|
@@ -66,39 +68,16 @@ def handle_query(query_str: str,
|
|
66 |
|
67 |
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
|
68 |
|
69 |
-
|
70 |
-
"""template = (
|
71 |
-
"Sei un assistente Q&A italiano di nome Odi, che risponde solo alle domande o richieste pertinenti in modo preciso. \n"
|
72 |
-
"---------------------\n"
|
73 |
-
"{context_str}"
|
74 |
-
"\n---------------------\n"
|
75 |
-
"Usa la cronologia delle chat precedenti, o il contesto sopra, per interagire e aiutare l'utente a rispondere alla domanda: {query_str}\n"
|
76 |
-
)
|
77 |
-
system_prompt_template = PromptTemplate(template)
|
78 |
-
|
79 |
-
|
80 |
-
system_message_content = system_prompt_template.format(context_str="Il vino è più buono a 30 gradi", query_str=query_str) #
|
81 |
-
|
82 |
-
chat_engine = CondensePlusContextChatEngine.from_defaults(
|
83 |
-
index.as_retriever(),
|
84 |
-
memory=memory,
|
85 |
-
#llm=GemmaLLMInterface(),
|
86 |
-
system_prompt=(
|
87 |
-
system_message_content
|
88 |
-
),
|
89 |
-
verbose=True,
|
90 |
-
)"""
|
91 |
-
|
92 |
-
|
93 |
-
conversation = []
|
94 |
for user, assistant in chat_history:
|
95 |
conversation.extend(
|
96 |
[
|
97 |
-
|
98 |
-
|
|
|
99 |
]
|
100 |
)
|
101 |
-
conversation.append(
|
102 |
|
103 |
chat_engine = index.as_chat_engine(
|
104 |
chat_mode="condense_plus_context",
|
@@ -119,10 +98,8 @@ def handle_query(query_str: str,
|
|
119 |
)
|
120 |
|
121 |
|
122 |
-
|
123 |
-
|
124 |
outputs = []
|
125 |
-
response = chat_engine.stream_chat(conversation)
|
126 |
#response = chat_engine.chat(query_str)
|
127 |
for token in response.response_gen:
|
128 |
#if not token.startswith("system:") and not token.startswith("user:"):
|
|
|
13 |
import spaces
|
14 |
from huggingface_hub import login
|
15 |
from llama_index.core.memory import ChatMemoryBuffer
|
16 |
+
from typing import Iterator, List
|
17 |
from llama_index.core.chat_engine import CondensePlusContextChatEngine
|
18 |
+
from llama_index.core.llms import ChatMessage, MessageRole
|
19 |
+
|
20 |
|
21 |
|
22 |
|
|
|
68 |
|
69 |
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
|
70 |
|
71 |
+
conversation: List[ChatMessage] = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
for user, assistant in chat_history:
|
73 |
conversation.extend(
|
74 |
[
|
75 |
+
ChatMessage(role=MessageRole.USER, content=user),
|
76 |
+
|
77 |
+
ChatMessage(role=MessageRole.ASSISTANT, content=assistant),
|
78 |
]
|
79 |
)
|
80 |
+
conversation.append( ChatMessage(role=MessageRole.USER, content=query_str))
|
81 |
|
82 |
chat_engine = index.as_chat_engine(
|
83 |
chat_mode="condense_plus_context",
|
|
|
98 |
)
|
99 |
|
100 |
|
|
|
|
|
101 |
outputs = []
|
102 |
+
response = chat_engine.stream_chat(query_str, conversation)
|
103 |
#response = chat_engine.chat(query_str)
|
104 |
for token in response.response_gen:
|
105 |
#if not token.startswith("system:") and not token.startswith("user:"):
|