ray commited on
Commit
7a9ec21
·
1 Parent(s): 9021b39
Files changed (3) hide show
  1. app.py +3 -2
  2. chat_template.py +3 -1
  3. custom_io.py +15 -2
app.py CHANGED
@@ -12,7 +12,7 @@ from llama_index.vector_stores.qdrant import QdrantVectorStore
12
  from llama_index.text_splitter import SentenceSplitter
13
  from llama_index.extractors import TitleExtractor
14
  from llama_index.ingestion import IngestionPipeline
15
- from chat_template import CHAT_TEXT_QA_PROMPT
16
  from schemas import ChatbotVersion, ServiceProvider
17
  from chatbot import Chatbot, IndexBuilder
18
  from custom_io import MarkdownReader, UnstructuredReader, default_file_metadata_func
@@ -29,7 +29,7 @@ llama_index.set_global_handler("arize_phoenix")
29
  openai.api_key = os.getenv("OPENAI_API_KEY")
30
 
31
  IS_LOAD_FROM_VECTOR_STORE = True
32
- VDB_COLLECTION_NAME = "demo-v1"
33
  MODEL_NAME = ChatbotVersion.CHATGPT_4.value
34
 
35
 
@@ -151,6 +151,7 @@ class AweSumCareContextChatbot(AwesumCareToolChatbot):
151
  self.chat_engine = self.index.as_chat_engine(
152
  chat_mode=ChatMode.CONTEXT,
153
  similarity_top_k=5,
 
154
  text_qa_template=CHAT_TEXT_QA_PROMPT)
155
 
156
  class AweSumCareSimpleChatbot(AwesumCareToolChatbot):
 
12
  from llama_index.text_splitter import SentenceSplitter
13
  from llama_index.extractors import TitleExtractor
14
  from llama_index.ingestion import IngestionPipeline
15
+ from chat_template import CHAT_TEXT_QA_PROMPT, TEXT_QA_SYSTEM_PROMPT
16
  from schemas import ChatbotVersion, ServiceProvider
17
  from chatbot import Chatbot, IndexBuilder
18
  from custom_io import MarkdownReader, UnstructuredReader, default_file_metadata_func
 
29
  openai.api_key = os.getenv("OPENAI_API_KEY")
30
 
31
  IS_LOAD_FROM_VECTOR_STORE = True
32
+ VDB_COLLECTION_NAME = "demo-v2"
33
  MODEL_NAME = ChatbotVersion.CHATGPT_4.value
34
 
35
 
 
151
  self.chat_engine = self.index.as_chat_engine(
152
  chat_mode=ChatMode.CONTEXT,
153
  similarity_top_k=5,
154
+ system_prompt=TEXT_QA_SYSTEM_PROMPT.content,
155
  text_qa_template=CHAT_TEXT_QA_PROMPT)
156
 
157
  class AweSumCareSimpleChatbot(AwesumCareToolChatbot):
chat_template.py CHANGED
@@ -8,7 +8,9 @@ TEXT_QA_SYSTEM_PROMPT = ChatMessage(
8
  "detailed information on legal and medical documents like '平安紙', '持久授權書', and '預設醫療指示'.\n"
9
  "Always answer queries using the context information provided, focusing on delivering "
10
  "accurate, comprehensive, and user-friendly responses.\n"
11
- "當用戶用繁體中文時,請盡量使用繁體中文作答。"
 
 
12
  ),
13
  role=MessageRole.SYSTEM,
14
  )
 
8
  "detailed information on legal and medical documents like '平安紙', '持久授權書', and '預設醫療指示'.\n"
9
  "Always answer queries using the context information provided, focusing on delivering "
10
  "accurate, comprehensive, and user-friendly responses.\n"
11
+ "任何與安心三寶無關的問題, "
12
+ "please simply say: 很抱歉,身為安心三寶人工智能,我無法回答與安心三寶無關的內容。\n"
13
+ "當用戶用繁體中文時,使用繁體中文作答。"
14
  ),
15
  role=MessageRole.SYSTEM,
16
  )
custom_io.py CHANGED
@@ -8,6 +8,7 @@ from datetime import datetime
8
  import mimetypes
9
  import os
10
  from pathlib import Path
 
11
  from typing import Any, Dict, List, Optional
12
 
13
  from llama_index.readers.base import BaseReader
@@ -80,9 +81,10 @@ def parse_knowledge_units(file_path):
80
 
81
  knowledge_units = []
82
  current_unit = ""
83
-
84
  for line in lines:
85
- if line.strip() and line[0].isdigit() and '.' in line:
 
86
  if current_unit:
87
  knowledge_units.append(current_unit.strip())
88
  current_unit = ""
@@ -92,6 +94,17 @@ def parse_knowledge_units(file_path):
92
 
93
  if current_unit:
94
  knowledge_units.append(current_unit.strip())
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  return knowledge_units
97
 
 
8
  import mimetypes
9
  import os
10
  from pathlib import Path
11
+ import re
12
  from typing import Any, Dict, List, Optional
13
 
14
  from llama_index.readers.base import BaseReader
 
81
 
82
  knowledge_units = []
83
  current_unit = ""
84
+ unit_start_pattern = re.compile(r'^\d+\.\s')
85
  for line in lines:
86
+ stripped_line = line.strip()
87
+ if unit_start_pattern.match(stripped_line):
88
  if current_unit:
89
  knowledge_units.append(current_unit.strip())
90
  current_unit = ""
 
94
 
95
  if current_unit:
96
  knowledge_units.append(current_unit.strip())
97
+ # for line in lines:
98
+ # if line.strip() and line[0].isdigit() and '.' in line:
99
+ # if current_unit:
100
+ # knowledge_units.append(current_unit.strip())
101
+ # current_unit = ""
102
+ # current_unit += line
103
+ # else:
104
+ # current_unit += line
105
+
106
+ # if current_unit:
107
+ # knowledge_units.append(current_unit.strip())
108
 
109
  return knowledge_units
110