thanhtung09t2 commited on
Commit
71968a5
1 Parent(s): e57bf30

Create engine.py

Browse files
Files changed (1) hide show
  1. api/engine.py +60 -0
api/engine.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # native packages
2
+ from api.llms.base import get_LLM
3
+ from api.embedding_models.base import get_embedding_model
4
+ from api.vector_index.base import get_vector_index
5
+ from llama_index.core import Settings
6
+ from llama_index.core.memory import ChatMemoryBuffer
7
+
8
+ QUERY_ENGINE_MODE = "tree_summarize"
9
+ CHAT_ENGINE_MODE = "context"
10
+ TOP_K = 3
11
+ MEMORY_TOKEN_LIMIT = 8000
12
+
13
+ class QueryEngine:
14
+ def __init__(self,
15
+ embedding_model = "BAAI/bge-m3",
16
+ llm = "aya:8b",
17
+ vector_index = "chroma",
18
+ force_new_db = False):
19
+ self.embed_config = get_embedding_model(embedding_model)
20
+ self.llm_config = get_LLM(llm)
21
+ self.index = get_vector_index(vector_index, force_new_db)
22
+ self.engine = self.index.as_query_engine(
23
+ text_qa_template = self.llm_config.query_context_template,
24
+ response_mode = QUERY_ENGINE_MODE,
25
+ similarity_top_k = TOP_K,
26
+ streaming = True
27
+ )
28
+
29
+ def query(self, user_input):
30
+ return self.engine.query(user_input)
31
+
32
+ def query_streaming(self, user_input):
33
+ return self.engine.query(user_input)
34
+
35
+ class ChatEngine:
36
+ def __init__(self,
37
+ embedding_model = "BAAI/bge-m3",
38
+ llm = "gpt4o_mini",
39
+ vector_index = "chroma",
40
+ force_new_db = False):
41
+ self.embed_config = get_embedding_model(embedding_model)
42
+ self.llm_config = get_LLM(llm)
43
+ self.index = get_vector_index(vector_index, force_new_db)
44
+ self.engine = self.index.as_chat_engine(
45
+ llm = Settings.llm,
46
+ chat_mode = CHAT_ENGINE_MODE,
47
+ verbose = False,
48
+ memory = ChatMemoryBuffer.from_defaults(token_limit=MEMORY_TOKEN_LIMIT),
49
+ system_prompt = self.llm_config.system_prompt,
50
+ context_template = self.llm_config.chat_context_template,
51
+ response_mode = QUERY_ENGINE_MODE,
52
+ similarity_top_k = TOP_K,
53
+ streaming = True
54
+ )
55
+
56
+ def query(self, user_input):
57
+ return self.engine.chat(user_input)
58
+
59
+ def query_streaming(self, user_input):
60
+ return self.engine.stream_chat(user_input)