Rams901 commited on
Commit
4174fb4
1 Parent(s): c40a87c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -101
app.py CHANGED
@@ -24,7 +24,9 @@ import ast
24
  from utils import ClaudeLLM, ClaudeLLM2, extract_website_name, remove_numbers
25
 
26
  embeddings = HuggingFaceEmbeddings()
27
- db = FAISS.load_local('db_full', embeddings)
 
 
28
 
29
  mp_docs = {}
30
  llm_4 = ChatOpenAI(
@@ -32,6 +34,7 @@ llm_4 = ChatOpenAI(
32
  model='gpt-4'
33
  )
34
  claude = ClaudeLLM()
 
35
  def add_text(history, text):
36
 
37
  print(history)
@@ -39,7 +42,12 @@ def add_text(history, text):
39
 
40
  return history, ""
41
 
42
- def retrieve_thoughts(query, ):
 
 
 
 
 
43
  # print(db.similarity_search_with_score(query = query, k = k, fetch_k = k*10))
44
  docs_with_score = db.similarity_search_with_score(query = query, k = 1500, fetch_k = len(db.index_to_docstore_id.values()))
45
  df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], )
@@ -48,19 +56,19 @@ def retrieve_thoughts(query, ):
48
 
49
  # TO-DO: What if user query doesn't match what we provide as documents
50
  # df.sort_values("score", inplace = True)
51
-
52
  tier_1 = df
53
  tier_2 = df[((df['score'] < 1) * (df["score"] > 0.8))]
54
 
55
  tier_1
56
- chunks_1 = tier_1.groupby(['title', 'url', ]).apply(lambda x: "\n...\n".join(x.sort_values('score').iloc[:3].sort_values('id')['page_content'].values)).values
 
57
  score = tier_1.groupby(['title', 'url', ]).apply(lambda x: x.sort_values('score').iloc[:3]['score'].mean()).values
58
 
59
- tier_1_adjusted = tier_1.groupby(['title', 'url', ]).first().reset_index()[[ 'title', 'url', 'author']]
60
  tier_1_adjusted['content'] = chunks_1
61
  tier_1_adjusted['score'] = score
62
 
63
-
64
  chunks_2 = tier_2.groupby(['title', 'url', ]).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
65
  tier_2_adjusted = tier_2.groupby(['title', 'url', ]).first().reset_index()[[ 'title', 'url']]
66
  tier_2_adjusted['content'] = chunks_2
@@ -70,88 +78,85 @@ def retrieve_thoughts(query, ):
70
  tier_1_adjusted.sort_values("score", inplace = True)
71
  tier_1_adjusted['ref'] = range(1, len(tier_1_adjusted) + 1 )
72
 
73
- return {'tier 1':tier_1_adjusted[:min(len(tier_1_adjusted), 100)], 'tier 2': tier_2_adjusted.loc[:5]}
74
 
75
- def get_references(query):
76
  # TO-DO FINSIH UPP.
77
- thoughts = retrieve_thoughts(query)
78
  print(thoughts.keys())
79
  tier_1 = thoughts['tier 1']
80
- reference = tier_1[['ref', 'url', 'title', 'author']].to_dict('records')
81
  return reference
82
- def qa_themes(query,):
83
-
84
- docs = ""
85
-
86
- global db
87
- print(db)
88
-
89
- global mp_docs
90
- thoughts = retrieve_thoughts(query)
91
- if not(thoughts):
92
 
93
- if mp_docs:
94
- thoughts = mp_docs
95
- else:
96
- mp_docs = thoughts
97
-
98
- tier_1 = thoughts['tier 1']
99
- tier_2 = thoughts['tier 2']
100
 
101
- reference = tier_1[['ref', 'url', 'title', 'author']].to_dict('records')
102
 
103
- tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n author: {x.author}\n content: {x.content}", axis = 1).values)
104
- tier_2 = list(tier_2.apply(lambda x: f"title: {x['title']}\n Content: {x.content}", axis = 1).values)
105
- print(f"QUERY: {query}\nTIER 1: {tier_1}\nTIER2: {tier_2}")
 
106
 
107
- # print(f"DOCS RETRIEVED: {mp_docs.values}")
108
- # Cynthesis Generation
 
109
 
 
110
 
111
- # Themes
112
- session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. You will use do different analysis to the articles provided to me. Stay truthful and if you weren't provided any resources give your oppinion only."""
113
- task = """Your primary responsibility is to identify multiple themes from the given articles. For each theme detected, you are to present it under three separate categories:
 
 
114
 
115
- 1. Theme Title - An easy-to-understand title that encapsulates the core idea of the theme extracted from the article.
116
-
117
- 2. Theme Description - An expanded elaboration that explores the theme in detail based on the arguments and points provided in the article.
118
-
119
- 3. Quotes related to theme - Locate and provide at least one compelling quote from the article that directly supports or showcases the theme you have identified. This quote should serve as a specific evidence or example from the article text that corresponds directly to the developed theme.
120
-
121
 
122
- Keep your answer direct and don't include your thoughts. Make sure that the quote used should have a reference [1] related to the article."""
 
 
 
 
 
 
123
 
 
 
124
 
125
  prompt = PromptTemplate(
126
- input_variables=["query", "task", "session_prompt", "articles"],
127
  template="""
128
- You are a {session_prompt}
129
  {task}
 
 
130
  query: {query}
131
  Articles:
132
  {articles}
133
 
134
- The extracted themes should be written in structured manner, ensuring clarity and meaningful correlation between the themes and the articles. Don't forget to mention the reference in the quote. Avoid including personal opinions or making generalizations that are not explicitly supported by the articles.
135
  Keep your answer direct and don't include your thoughts.
136
  """,
137
  )
138
 
139
-
140
- # llm = BardLLM()
141
  chain = LLMChain(llm=claude, prompt = prompt)
142
 
143
- themes = chain.run(query=query, articles="\n".join(tier_1), session_prompt = session_prompt, task = task)
144
- return themes
145
 
146
- def qa_retrieve(query,):
147
 
 
 
148
  docs = ""
149
 
150
  global db
151
  print(db)
152
 
153
  global mp_docs
154
- thoughts = retrieve_thoughts(query)
155
  if not(thoughts):
156
 
157
  if mp_docs:
@@ -164,14 +169,14 @@ def qa_retrieve(query,):
164
 
165
  reference = tier_1[['ref', 'url', 'title']].to_dict('records')
166
 
167
- tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n author: {x.author}\n content: {x.content}", axis = 1).values)
168
  tier_2 = list(tier_2.apply(lambda x: f"title: {x['title']}\n Content: {x.content}", axis = 1).values)
169
  print(f"QUERY: {query}\nTIER 1: {tier_1}\nTIER2: {tier_2}")
170
 
171
  # print(f"DOCS RETRIEVED: {mp_docs.values}")
172
  # Cynthesis Generation
173
 
174
- session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. You will use do different analysis to the articles provided to me. Stay truthful and if you weren't provided any resources give your oppinion only."""
175
  # task = """Create a coherent synthesis in which you use references to the id of articles provided and relevant to the query.
176
 
177
  # Follow the example structure:
@@ -184,41 +189,42 @@ def qa_retrieve(query,):
184
  # - Popular wine choices include Cabernet Sauvignon, Pinot Noir, Zinfandel, Malbec, Syrah, and Merlot [2].
185
  # Remember, the goal is to choose a wine that complements the cut of steak and not overwhelm or take away from the flavor of the meat [3]."
186
  # """
187
-
188
  prompt = PromptTemplate(
189
  input_variables=["query", "session_prompt", "articles"],
190
  template="""
191
  You are a {session_prompt}
192
- Create a coherent well-structured synthesis in which you use references to the id of articles provided and relevant to the query.
 
193
 
194
- Follow the example structure, references are not provided but are found in the answer:
195
- User: What are the secondary effects of covid?
196
- Cynthesis: \nSecondary effects of COVID-19, often referred to as \"Long COVID\", are a significant concern. These effects are not limited to the acute phase of the disease but persist well past the first month, affecting various organ systems and leading to adverse outcomes such as all-cause death and hospitalization [1]. \n\nOne of the most alarming secondary effects is the increased risk of cardiovascular diseases. Studies have shown a 1.6-fold increased risk of stroke and a 2-fold higher risk of acute coronary disease in individuals who had COVID-19 [2][3][8]. These risks were observed even in younger populations, with a mean age of 44, and were prevalent after 30 days post-infection [2][3]. \n\nAnother study found that the adverse outcomes of COVID-19 could persist up to the 2-year mark, with the toll of adverse sequelae being worst during the first year [3]. The study also highlighted that individuals with severe COVID-19, who were hospitalized, were more likely to be afflicted with protracted symptoms and new medical diagnoses [3]. \n\nHowever, it's important to note that the risks associated with Long COVID might be most significant in the first few weeks post-infection and fade away as time goes on [4][9]. For instance, the chance of developing pulmonary embolism was found to be 32 times higher in the first month after testing positive for COVID-19 [4]. \n\nMoreover, the number of excess deaths in the U.S., which would indicate fatal consequences of mild infections at a delay of months or years, dropped to zero in April, about two months after the end of the winter surge, and have stayed relatively low ever since [4]. This suggests that a second wave of deaths—a long-COVID wave—never seems to break [4]. \n\nIn conclusion, while the secondary effects of COVID-19 are significant and can persist for a long time, the most severe risks seem to occur in the first few weeks post-infection and then gradually decrease. However, the full extent of the long-term effects of COVID-19 is still unknown, and further research is needed to fully understand the ways and extent COVID-19 has affected us.",
197
 
198
  query: {query}
199
 
200
  Articles:
201
  {articles}
202
 
203
- Make sure to quote the article used if the argument corresponds to the query.
204
- Use careful reasoning and professional writing for the synthesis. No need to mention your interaction with articles.
205
  Remember not to mention articles used at the beginning of sentences, keep it cohesive and rich in text while referencing as much as possible of sources given.
206
  """,
207
  )
208
 
209
 
210
- llm = ClaudeLLM2()
211
- chain = LLMChain(llm=llm, prompt = prompt)
 
 
 
212
 
213
- consensus = chain.run(query=query, articles="\n".join(tier_1), session_prompt = session_prompt,).strip()
214
  if "In conclusion" in consensus:
215
  consensus = consensus[:consensus.index('In conclusion')]
216
-
217
- consensus = consensus[consensus.index(':')+1:].strip()
218
-
219
- intro = qa_intro(query, consensus, tier_1)
220
- conclusion = qa_conclusion(query, consensus, tier_1)
221
- cynthesis = intro + "\n\n" + consensus + conclusion
222
  # for i in range(1, len(tier_1)+1):
223
  # response = response.replace(f'[{i}]', f"<span class='text-primary'>[{i}]</span>")
224
 
@@ -226,41 +232,40 @@ def qa_retrieve(query,):
226
  # json_resp = {'cynthesis': response, 'questions': questions, 'reference': reference}
227
 
228
  return cynthesis
 
229
  def qa_intro(query, cynthesis, tier_1,):
230
-
231
 
232
  llm = ClaudeLLM()
233
  llm_4 = ChatOpenAI(
234
- temperature=0.5,
235
  model='gpt-3.5-turbo-16k'
236
  )
237
 
238
  session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. You will use do different analysis to the articles provided to me. Stay truthful and if you weren't provided any resources give your oppinion only."""
239
 
240
  prompt = PromptTemplate(
241
- input_variables=["query", "articles"],
242
  template="""
243
- Give me an introduction to the following topic. Consider this an abstract. And after finishing the introduction, pick one quote from the sources given below.
244
- this is the desired structure:
245
- Introduction: Introduction
246
- Quote: "quote". [1] (Reference)
247
 
248
- Follow the structure given.
249
  query: {query}
250
-
 
251
  We have the opportunity to give an introduction to this synthesis without repeating information found.
252
  Pick an opening quote from the sources given below\n
253
  ---------\n
254
  {articles}
255
  ---------\n
256
 
257
- Don't forget that your job is to only provide an introduction, abstract part that introduces the synthesis and then pick one general quote from the sources given. Maintain the desired structure.""",
258
  )
259
 
 
260
  # llm = BardLLM()
261
  chain = LLMChain(llm=llm_4, prompt = prompt)
262
 
263
- intro = chain.run(query=query, articles="\n".join(tier_1[:15]))
264
  return intro.strip()
265
 
266
  def qa_conclusion(query, cynthesis, tier_1,):
@@ -295,12 +300,12 @@ def qa_conclusion(query, cynthesis, tier_1,):
295
  # llm = BardLLM()
296
  chain = LLMChain(llm=llm_4, prompt = prompt)
297
 
298
- conclusion = chain.run(query=query, articles="\n".join(tier_1[:15]), cynthesis = cynthesis)
299
  return conclusion.strip()
300
-
301
- def qa_faqs(query):
302
 
303
- thoughts = retrieve_thoughts(query)
 
 
304
 
305
  # tier_1 = thoughts['tier 1']
306
  tier_2 = thoughts['tier 2']
@@ -310,7 +315,7 @@ def qa_faqs(query):
310
  # tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n Content: {x.content}", axis = 1).values)
311
  tier_2 = list(tier_2.apply(lambda x: f"title: {x['title']}\n Content: {x.content}", axis = 1).values)
312
  # Generate related questions
313
- session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. You will use do different analysis to the articles provided to me. Stay truthful and if you weren't provided any resources give your oppinion only."""
314
 
315
  prompt_q = PromptTemplate(
316
  input_variables=[ "session_prompt", "articles"],
@@ -331,16 +336,16 @@ def qa_faqs(query):
331
  questions = questions[questions.index('1'):]
332
 
333
  questions = [ t.strip() for (i, t) in enumerate(questions.split('\n\n')) if len(t) > 5][:5]
334
-
335
  return "\n\n".join(questions)
336
- # examples = [
337
- # ["Will Russia win the war in Ukraine?"],
338
 
339
- # ]
340
 
341
- # demo = gr.Interface(fn=qa_retrieve, title="cicero-qa-api",
342
- # inputs=gr.inputs.Textbox(lines=5, label="what would you like to learn about?"),
343
- # outputs="json",examples=examples)
344
 
345
  def parallel_greet_claude(batch, ):
346
 
@@ -356,19 +361,19 @@ def parallel_greet_claude(batch, ):
356
  # ["Covid Global Impact and what's beyond that"]
357
  # ]
358
 
359
- cynthesis = gr.Interface(fn = qa_retrieve, inputs = "text", outputs = gr.components.Textbox(lines=3, label="Cynthesis"))
360
- questions = gr.Interface(fn = qa_faqs, inputs = "text", outputs = gr.components.Textbox(lines=3, label="Related Questions"))
361
- themes = gr.Interface(fn = qa_themes, inputs = "text", outputs = gr.components.Textbox(lines=3, label="themes"))
362
 
363
  # gpt_3 = gr.Interface(fn = parallel_greet_gpt_3, inputs = "text", outputs = gr.components.Textbox(lines=3, label="GPT3.5"))
364
  # gpt_4 = gr.Interface(fn = parallel_greet_gpt_4, inputs = "text", outputs = gr.components.Textbox(lines=3, label="GPT4"))
365
  # claude = gr.Interface(fn = parallel_greet_claude, inputs = "text", outputs = gr.components.Textbox(lines=3, label="Claude"))
366
- reference = gr.Interface(fn = get_references, inputs = "text", outputs = "json", label = "Reference")
367
-
368
- demo = gr.Parallel(cynthesis, themes, questions, reference)
369
-
370
- # demo = gr.Series(references, cynthesis, themes)
371
 
 
372
  demo.queue(concurrency_count = 4)
373
  demo.launch()
374
 
 
24
  from utils import ClaudeLLM, ClaudeLLM2, extract_website_name, remove_numbers
25
 
26
  embeddings = HuggingFaceEmbeddings()
27
+
28
+ db_art = FAISS.load_local('db_art', embeddings)
29
+ db_yt = FAISS.load_local('db_yt', embeddings)
30
 
31
  mp_docs = {}
32
  llm_4 = ChatOpenAI(
 
34
  model='gpt-4'
35
  )
36
  claude = ClaudeLLM()
37
+ claude2 = ClaudeLLM2()
38
  def add_text(history, text):
39
 
40
  print(history)
 
42
 
43
  return history, ""
44
 
45
+ def retrieve_thoughts(query, media):
46
+ if media[0] == "Articles":
47
+ db = db_art
48
+ else:
49
+ db = db_yt
50
+
51
  # print(db.similarity_search_with_score(query = query, k = k, fetch_k = k*10))
52
  docs_with_score = db.similarity_search_with_score(query = query, k = 1500, fetch_k = len(db.index_to_docstore_id.values()))
53
  df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], )
 
56
 
57
  # TO-DO: What if user query doesn't match what we provide as documents
58
  # df.sort_values("score", inplace = True)
59
+
60
  tier_1 = df
61
  tier_2 = df[((df['score'] < 1) * (df["score"] > 0.8))]
62
 
63
  tier_1
64
+ chunks_1 = tier_1.groupby(['title', 'url', ]).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
65
+ print(len(chunks_1[0]))
66
  score = tier_1.groupby(['title', 'url', ]).apply(lambda x: x.sort_values('score').iloc[:3]['score'].mean()).values
67
 
68
+ tier_1_adjusted = tier_1.groupby(['title', 'url', ]).first().reset_index()[[ 'title', 'url']]
69
  tier_1_adjusted['content'] = chunks_1
70
  tier_1_adjusted['score'] = score
71
 
 
72
  chunks_2 = tier_2.groupby(['title', 'url', ]).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
73
  tier_2_adjusted = tier_2.groupby(['title', 'url', ]).first().reset_index()[[ 'title', 'url']]
74
  tier_2_adjusted['content'] = chunks_2
 
78
  tier_1_adjusted.sort_values("score", inplace = True)
79
  tier_1_adjusted['ref'] = range(1, len(tier_1_adjusted) + 1 )
80
 
81
+ return {'tier 1':tier_1_adjusted[:min(len(tier_1_adjusted), 30)], 'tier 2': tier_2_adjusted.loc[:5]}
82
 
83
+ def get_references(query, media):
84
  # TO-DO FINSIH UPP.
85
+ thoughts = retrieve_thoughts(query, media)
86
  print(thoughts.keys())
87
  tier_1 = thoughts['tier 1']
88
+ reference = tier_1[['ref', 'url', 'title']].to_dict('records')
89
  return reference
 
 
 
 
 
 
 
 
 
 
90
 
91
+ def grab_jsons(query, media = None, tier_1 = None, ):
 
 
 
 
 
 
92
 
93
+ response = ""
94
 
95
+ if tier_1 is None:
96
+ thoughts = retrieve_thoughts(query, media)
97
+ tier_1 = thoughts['tier 1']
98
+ tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n Content: {x.content}", axis = 1).values)
99
 
100
+ for i in range(3, len(tier_1), 3):
101
+ portion = tier_1[i - 3 :i]
102
+ response += '\n' + jsonify_articles(query, portion)
103
 
104
+ return response
105
 
106
+ def jsonify_articles(query, tier_1 = None):
107
+ if tier_1 is None:
108
+ thoughts = retrieve_thoughts(query)
109
+ tier_1 = thoughts['tier 1']
110
+ tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n Content: {x.content}", axis = 1).values)
111
 
112
+ # json
113
+ # {
114
+ # 'ref': 1,
115
+ # 'quotes': ['quote_1', 'quote_2', 'quote_3'],
116
+ # 'summary (optional for now as we already have summaries)': ""
117
+ # }
118
 
119
+ session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. You will execute different analysis to the articles provided to you. Stay truthful and if you weren't provided any resources give your oppinion only."""
120
+ task = """Your primary responsibility is to identify valuable information from the given articles related to a given query.
121
+ For each article provided, you are to present it under four separate categories:
122
+ 1. Article Reference - A reference for the article id: int
123
+ 2. Article Title - The title for the article: string
124
+ 3. Article quotes - Numerous Quotes extracted from the article that prove certain point of views in a list format [quote_1, quote_2, quote_3, quote_4, quote_5]
125
+ 4. Article Summary - A summary for the article: string
126
 
127
+ Make sure to include all valuable quotes to be used later on.
128
+ Keep your answer direct and don't include your thoughts. Make sure that the quote used should have a reference [1] that identifies the source."""
129
 
130
  prompt = PromptTemplate(
131
+ input_variables=["query", "task", "articles"],
132
  template="""
 
133
  {task}
134
+
135
+ The extracted information should correlate to the following query.
136
  query: {query}
137
  Articles:
138
  {articles}
139
 
140
+ The extracted information should be written in structured manner, ensuring clarity and meaningful format for the articles. Avoid including personal opinions or making generalizations that are not explicitly supported by the articles.
141
  Keep your answer direct and don't include your thoughts.
142
  """,
143
  )
144
 
 
 
145
  chain = LLMChain(llm=claude, prompt = prompt)
146
 
147
+ json_articles = chain.run(query=query, articles="\n".join(tier_1), task = task).strip()
 
148
 
149
+ return json_articles
150
 
151
+ def qa_retrieve(query, media):
152
+
153
  docs = ""
154
 
155
  global db
156
  print(db)
157
 
158
  global mp_docs
159
+ thoughts = retrieve_thoughts(query, media)
160
  if not(thoughts):
161
 
162
  if mp_docs:
 
169
 
170
  reference = tier_1[['ref', 'url', 'title']].to_dict('records')
171
 
172
+ tier_1 = list(tier_1.apply(lambda x: f"ref: [{int(x['ref'])}]\ntitle: {x['title']}\n Content: {x.content}", axis = 1).values)
173
  tier_2 = list(tier_2.apply(lambda x: f"title: {x['title']}\n Content: {x.content}", axis = 1).values)
174
  print(f"QUERY: {query}\nTIER 1: {tier_1}\nTIER2: {tier_2}")
175
 
176
  # print(f"DOCS RETRIEVED: {mp_docs.values}")
177
  # Cynthesis Generation
178
 
179
+ session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. You will use do different analysis to the articles provided to you. You will generate a rich synthesis with numerous arguments extracted from the sources given."""
180
  # task = """Create a coherent synthesis in which you use references to the id of articles provided and relevant to the query.
181
 
182
  # Follow the example structure:
 
189
  # - Popular wine choices include Cabernet Sauvignon, Pinot Noir, Zinfandel, Malbec, Syrah, and Merlot [2].
190
  # Remember, the goal is to choose a wine that complements the cut of steak and not overwhelm or take away from the flavor of the meat [3]."
191
  # """
192
+ articles = grab_jsons(query, tier_1 = tier_1)
193
  prompt = PromptTemplate(
194
  input_variables=["query", "session_prompt", "articles"],
195
  template="""
196
  You are a {session_prompt}
197
+ Create a rich well-structured synthesis in which you use references to the id of articles provided and relevant to the query.
198
+ The Synthesis should include at least 500 words englobing all prespectives found in the sources.
199
 
200
+ Follow the example structure:
201
+ User: What are the secondary effects of covid?
202
+ Synthesis: \nSecondary effects of COVID-19, often referred to as \"Long COVID\", are a significant concern. These effects are not limited to the acute phase of the disease but persist well past the first month, affecting various organ systems and leading to adverse outcomes such as all-cause death and hospitalization [1]. \n\nOne of the most alarming secondary effects is the increased risk of cardiovascular diseases. Studies have shown a 1.6-fold increased risk of stroke and a 2-fold higher risk of acute coronary disease in individuals who had COVID-19 [2][3][8]. These risks were observed even in younger populations, with a mean age of 44, and were prevalent after 30 days post-infection [2][3]. \n\nAnother study found that the adverse outcomes of COVID-19 could persist up to the 2-year mark, with the toll of adverse sequelae being worst during the first year [3]. The study also highlighted that individuals with severe COVID-19, who were hospitalized, were more likely to be afflicted with protracted symptoms and new medical diagnoses [3]. \n\nHowever, it's important to note that the risks associated with Long COVID might be most significant in the first few weeks post-infection and fade away as time goes on [4][9]. For instance, the chance of developing pulmonary embolism was found to be 32 times higher in the first month after testing positive for COVID-19 [4]. \n\nMoreover, the number of excess deaths in the U.S., which would indicate fatal consequences of mild infections at a delay of months or years, dropped to zero in April, about two months after the end of the winter surge, and have stayed relatively low ever since [4]. This suggests that a second wave of deaths—a long-COVID wave—never seems to break [4]. \n\nIn conclusion, while the secondary effects of COVID-19 are significant and can persist for a long time, the most severe risks seem to occur in the first few weeks post-infection and then gradually decrease. However, the full extent of the long-term effects of COVID-19 is still unknown, and further research is needed to fully understand the ways and extent COVID-19 has affected us.",
203
 
204
  query: {query}
205
 
206
  Articles:
207
  {articles}
208
 
209
+ Make sure to quote the article used if the argument corresponds to the query. Add as many arguments and point of views as possible as long as they have a reference.
210
+ Keep your answer direct and use careful reasoning and professional writing for the synthesis. No need to mention your interaction with articles.
211
  Remember not to mention articles used at the beginning of sentences, keep it cohesive and rich in text while referencing as much as possible of sources given.
212
  """,
213
  )
214
 
215
 
216
+ # llm = BardLLM()
217
+ chain = LLMChain(llm=claude2, prompt = prompt)
218
+
219
+ consensus = chain.run(query=query, articles=articles, session_prompt = session_prompt,)
220
+ consensus = consensus[consensus.index(':')+1:].strip()
221
 
 
222
  if "In conclusion" in consensus:
223
  consensus = consensus[:consensus.index('In conclusion')]
224
+ intro = qa_intro(query, consensus, articles)
225
+ conclusion = qa_conclusion(query, consensus, articles)
226
+ cynthesis = intro + "\n\n" + consensus + "\n\n" + conclusion
227
+
 
 
228
  # for i in range(1, len(tier_1)+1):
229
  # response = response.replace(f'[{i}]', f"<span class='text-primary'>[{i}]</span>")
230
 
 
232
  # json_resp = {'cynthesis': response, 'questions': questions, 'reference': reference}
233
 
234
  return cynthesis
235
+
236
  def qa_intro(query, cynthesis, tier_1,):
237
+
238
 
239
  llm = ClaudeLLM()
240
  llm_4 = ChatOpenAI(
241
+ temperature=0,
242
  model='gpt-3.5-turbo-16k'
243
  )
244
 
245
  session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. You will use do different analysis to the articles provided to me. Stay truthful and if you weren't provided any resources give your oppinion only."""
246
 
247
  prompt = PromptTemplate(
248
+ input_variables=["query", "cynthesis", "articles"],
249
  template="""
250
+ Give me an introduction to the following consensus without repeating how it starts. Consider this an abstract. And after finishing the introduction, pick one quote from the sources given below.
 
 
 
251
 
 
252
  query: {query}
253
+ Here's the consensus: {cynthesis}
254
+
255
  We have the opportunity to give an introduction to this synthesis without repeating information found.
256
  Pick an opening quote from the sources given below\n
257
  ---------\n
258
  {articles}
259
  ---------\n
260
 
261
+ Don't forget that your job is to only provide an introduction, abstract part that introduces the synthesis without repeating it and then pick one general quote from the sources given.""",
262
  )
263
 
264
+
265
  # llm = BardLLM()
266
  chain = LLMChain(llm=llm_4, prompt = prompt)
267
 
268
+ intro = chain.run(query=query, articles=tier_1, cynthesis = cynthesis)
269
  return intro.strip()
270
 
271
  def qa_conclusion(query, cynthesis, tier_1,):
 
300
  # llm = BardLLM()
301
  chain = LLMChain(llm=llm_4, prompt = prompt)
302
 
303
+ conclusion = chain.run(query=query, articles=tier_1, cynthesis = cynthesis)
304
  return conclusion.strip()
 
 
305
 
306
+ def qa_faqs(query, media):
307
+
308
+ thoughts = retrieve_thoughts(query, media)
309
 
310
  # tier_1 = thoughts['tier 1']
311
  tier_2 = thoughts['tier 2']
 
315
  # tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n Content: {x.content}", axis = 1).values)
316
  tier_2 = list(tier_2.apply(lambda x: f"title: {x['title']}\n Content: {x.content}", axis = 1).values)
317
  # Generate related questions
318
+ session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. You will do different analysis to the articles provided to me. Stay truthful and if you weren't provided any resources give your oppinion only."""
319
 
320
  prompt_q = PromptTemplate(
321
  input_variables=[ "session_prompt", "articles"],
 
336
  questions = questions[questions.index('1'):]
337
 
338
  questions = [ t.strip() for (i, t) in enumerate(questions.split('\n\n')) if len(t) > 5][:5]
339
+
340
  return "\n\n".join(questions)
341
+ examples = [
342
+ ["Will Russia win the war in Ukraine?"],
343
 
344
+ ]
345
 
346
+ demo = gr.Interface(fn=qa_retrieve, title="cicero-qa-api",
347
+ inputs=gr.inputs.Textbox(lines=5, label="what would you like to learn about?"),
348
+ outputs="json",examples=examples)
349
 
350
  def parallel_greet_claude(batch, ):
351
 
 
361
  # ["Covid Global Impact and what's beyond that"]
362
  # ]
363
 
364
+ cynthesis = gr.Interface(fn = qa_retrieve, inputs = ["text", gr.CheckboxGroup(["Articles", "Podcasts", "Youtube"], label="Media", info="Choose One Type of Media until we merge (Podcasts excluded for now)"),], outputs = gr.components.Textbox(lines=3, label="Cynthesis"))
365
+ questions = gr.Interface(fn = qa_faqs, inputs = ["text", gr.CheckboxGroup(["Articles", "Podcasts", "Youtube"], label="Media", info="Choose One Type of Media until we merge (Podcasts excluded for now)"),], outputs = gr.components.Textbox(lines=3, label="Related Questions"))
366
+ #themes = gr.Interface(fn = qa_themes, inputs = ["text", gr.CheckboxGroup(["Articles", "Podcasts", "Youtube"], label="Media", info="Choose One Type of Media until we merge (Podcasts excluded for now)"),], outputs = gr.components.Textbox(lines=3, label="themes"))
367
 
368
  # gpt_3 = gr.Interface(fn = parallel_greet_gpt_3, inputs = "text", outputs = gr.components.Textbox(lines=3, label="GPT3.5"))
369
  # gpt_4 = gr.Interface(fn = parallel_greet_gpt_4, inputs = "text", outputs = gr.components.Textbox(lines=3, label="GPT4"))
370
  # claude = gr.Interface(fn = parallel_greet_claude, inputs = "text", outputs = gr.components.Textbox(lines=3, label="Claude"))
371
+ reference = gr.Interface(fn = get_references, inputs = ["text", gr.CheckboxGroup(["Articles", "Podcasts", "Youtube"], label="Media", info="Choose One Type of Media until we merge (Podcasts excluded for now)"),], outputs = "json", label = "Reference")
372
+ json = gr.Interface(fn = grab_jsons, inputs = ["text", gr.CheckboxGroup(["Articles", "Podcasts", "Youtube"], label="Media", info="Choose One Type of Media until we merge (Podcasts excluded for now)"),], outputs = gr.components.Textbox(lines=3, label="json"))
373
+ #demo = gr.Parallel(cynthesis, reference)
374
+ #demo = gr.Parallel(themes, reference)
 
375
 
376
+ demo = gr.Parallel(json, cynthesis, questions, reference)
377
  demo.queue(concurrency_count = 4)
378
  demo.launch()
379