Paul-Louis Pröve commited on
Commit
228a2e7
1 Parent(s): de62d3e

doc reference, multi language

Browse files
Files changed (3) hide show
  1. app.py +73 -59
  2. sys_prompt.txt +1 -0
  3. translate_prompt.txt +5 -0
app.py CHANGED
@@ -27,20 +27,18 @@ makes = df["make"].unique().to_list()
27
  models = df["model"].unique().to_list()
28
 
29
  with open("sys_prompt.txt", "r") as f:
30
- prompt = f.read()
31
-
32
-
33
- def embed(message):
34
- return embedder.encode([message])[0]
35
 
 
 
36
 
37
  # llm = AzureChatOpenAI(deployment_name="chatserver35turbo")
38
  embedder = SentenceTransformer("BAAI/bge-small-en")
39
- search = AzureSearch(
40
  azure_search_endpoint=vector_store_address,
41
  azure_search_key=vector_store_password,
42
  index_name=index_name,
43
- embedding_function=embed,
44
  )
45
 
46
 
@@ -57,21 +55,39 @@ def filter_models(year, make):
57
  return gr.Dropdown.update(choices=choices, interactive=True)
58
 
59
 
60
- def search_db(query, year, make, model, k=5, s_type="similarity"):
61
- filters = f"year eq {year} and make eq '{make}' and model eq '{model}'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- res = []
64
- if search_type == "hybrid":
65
- res = search.similarity_search(query, k, search_type=s_type, filters=filters)
66
- else:
67
- mult = 1
68
- while len(res) < k or mult <= 16:
69
- res = search.similarity_search(
70
- query, 100 * mult, search_type=s_type, filters=filters
71
- )
72
- mult *= 2
73
- res = res[:k]
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  results = []
76
  for r in res:
77
  results.append(
@@ -80,55 +96,53 @@ def search_db(query, year, make, model, k=5, s_type="similarity"):
80
  "content": r.page_content,
81
  }
82
  )
83
- return str(results)
84
-
85
 
86
- def respond(message, history, year, make, model, search_type):
87
- if not year or not make or not model:
88
- msg = "Please select a year, make, and model."
89
- # return msg
90
- for i in range(len(msg)):
91
- time.sleep(0.02)
92
- yield msg[: i + 1]
93
- else:
94
- results = search_db(message, year, make, model, k=5, s_type=search_type)
95
 
96
- hist = []
97
- hist.append(
98
- {
99
- "role": "system",
100
- "content": prompt + results,
101
- }
102
- )
103
- hist.append(
104
- {
105
- "role": "user",
106
- "content": f"Year: {year}\nMake: {make}\nModel: {model}\n\n{message}",
107
- }
108
- )
109
- model = "chatserver35turbo16k"
110
- res = openai.ChatCompletion.create(
111
- deployment_id=model, messages=hist, temperature=0.0, stream=True
112
- )
113
- msg = ""
114
- # return str(res["choices"][0]["message"]["content"])
115
- for chunk in res:
116
- if "content" in chunk["choices"][0]["delta"]:
117
- msg = msg + chunk["choices"][0]["delta"]["content"]
118
- yield msg
119
 
120
 
121
  with gr.Blocks(
122
- css="footer {visibility: hidden} #component-8 {height: 75vh !important} #component-9 {height: 70vh !important}"
123
  ) as app:
124
  with gr.Row():
125
  year = gr.Dropdown(years, label="Year")
126
  make = gr.Dropdown([], label="Make", interactive=False)
127
  model = gr.Dropdown([], label="Model", interactive=False)
128
- types = ["similarity", "hybrid"]
129
- search_type = gr.Dropdown(types, label="Search Type", value="hybrid")
130
  year.change(filter_makes, year, make)
131
  make.change(filter_models, [year, make], model)
132
- row = [year, make, model, search_type]
133
- gr.ChatInterface(respond, additional_inputs=row).queue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  app.queue().launch(auth=("motor", "vectorsearch"))
 
 
27
  models = df["model"].unique().to_list()
28
 
29
  with open("sys_prompt.txt", "r") as f:
30
+ sys_prompt = f.read()
 
 
 
 
31
 
32
+ with open("translate_prompt.txt", "r") as f:
33
+ translate_prompt = f.read()
34
 
35
  # llm = AzureChatOpenAI(deployment_name="chatserver35turbo")
36
  embedder = SentenceTransformer("BAAI/bge-small-en")
37
+ vector_store = AzureSearch(
38
  azure_search_endpoint=vector_store_address,
39
  azure_search_key=vector_store_password,
40
  index_name=index_name,
41
+ embedding_function=lambda x: embedder.encode([x])[0],
42
  )
43
 
44
 
 
55
  return gr.Dropdown.update(choices=choices, interactive=True)
56
 
57
 
58
+ def gpt(history, prompt, temp=0.0, stream=True):
59
+ hist = [{"role": "system", "content": prompt}]
60
+ for user, bot in history:
61
+ hist += [{"role": "user", "content": user}]
62
+ if bot:
63
+ hist += [{"role": "assistant", "content": bot}]
64
+ return openai.ChatCompletion.create(
65
+ deployment_id="chatserver35turbo16k",
66
+ messages=hist,
67
+ temperature=temp,
68
+ stream=stream,
69
+ )
70
+
71
+
72
+ def user(message, history):
73
+ # Necessary to clear input and display message
74
+ return "", history + [[message, None]]
75
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ def search(history, results, year, make, model):
78
+ if results:
79
+ # If results already exist, don't search again
80
+ return history, results
81
+
82
+ query = gpt(history, translate_prompt, stream=False)["choices"][0]["message"][
83
+ "content"
84
+ ]
85
+ print(query)
86
+
87
+ filters = f"year eq {year} and make eq '{make}' and model eq '{model}'"
88
+ res = vector_store.similarity_search(
89
+ query, 5, search_type="hybrid", filters=filters
90
+ )
91
  results = []
92
  for r in res:
93
  results.append(
 
96
  "content": r.page_content,
97
  }
98
  )
99
+ return history, results
 
100
 
 
 
 
 
 
 
 
 
 
101
 
102
+ def bot(history, results):
103
+ res = gpt(history, sys_prompt + str(results))
104
+ history[-1][1] = ""
105
+ for chunk in res:
106
+ if "content" in chunk["choices"][0]["delta"]:
107
+ history[-1][1] = history[-1][1] + chunk["choices"][0]["delta"]["content"]
108
+ yield history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
 
111
  with gr.Blocks(
112
+ css="footer {visibility: hidden} #docs {height: 600px; overflow: auto !important}"
113
  ) as app:
114
  with gr.Row():
115
  year = gr.Dropdown(years, label="Year")
116
  make = gr.Dropdown([], label="Make", interactive=False)
117
  model = gr.Dropdown([], label="Model", interactive=False)
 
 
118
  year.change(filter_makes, year, make)
119
  make.change(filter_models, [year, make], model)
120
+ with gr.Row():
121
+ with gr.Column(scale=0.3333):
122
+ results = []
123
+ text = gr.JSON(None, language="json", interactive=False, elem_id="docs")
124
+ with gr.Column(scale=0.6667):
125
+ chatbot = gr.Chatbot(height=462)
126
+ with gr.Row():
127
+ msg = gr.Textbox(show_label=False, scale=7)
128
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
129
+ search,
130
+ [chatbot, text, year, make, model],
131
+ [chatbot, text],
132
+ queue=False,
133
+ ).then(bot, [chatbot, text], chatbot)
134
+ btn = gr.Button("Send", variant="primary")
135
+ btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
136
+ search,
137
+ [chatbot, text, year, make, model],
138
+ [chatbot, text],
139
+ queue=False,
140
+ ).then(bot, [chatbot, text], chatbot)
141
+ with gr.Row():
142
+ gr.Button("Clear").click(
143
+ lambda x, y: ([], None), [chatbot, text], [chatbot, text]
144
+ )
145
+ gr.Button("Undo").click(lambda x: (x[:-1]), [chatbot], [chatbot])
146
+
147
  app.queue().launch(auth=("motor", "vectorsearch"))
148
+ # app.queue().launch()
sys_prompt.txt CHANGED
@@ -4,6 +4,7 @@ You only and exclusively use the documents as a source of information.
4
  If the documents don't provide the answer or are empty, simply say so.
5
  Use only those documents that are strictly relevant to the query.
6
  Structure your answer step by step if it fits the query.
 
7
  Include a list of relevant document titles in the end of your response.
8
 
9
  Documents:
 
4
  If the documents don't provide the answer or are empty, simply say so.
5
  Use only those documents that are strictly relevant to the query.
6
  Structure your answer step by step if it fits the query.
7
+ Answer in the language the question or query is asked in.
8
  Include a list of relevant document titles in the end of your response.
9
 
10
  Documents:
translate_prompt.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ You are a professional translator.
2
+ Any text that the user sends, you translate to English.
3
+ If the text already is in English, just return the original text.
4
+ Do not add remarks, comments, confirmations or acknoledgements.
5
+ Simply return the English text.