kiyer commited on
Commit
f08a02e
·
verified ·
1 Parent(s): 2ddd003

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +18 -11
  2. kw_tags.npz +3 -0
app.py CHANGED
@@ -43,7 +43,7 @@ from openai import OpenAI
43
  # import anthropic
44
  import cohere
45
  import faiss
46
-
47
  import spacy
48
  from string import punctuation
49
  import pytextrank
@@ -282,8 +282,8 @@ class RetrievalSystem():
282
  indices = [i for i in top_results]
283
  df.insert(1,'ADS Link',links,True)
284
  df.insert(2,'Relevance',scores,True)
285
- df.insert(3,'Indices',indices,True)
286
- df = df[['ADS Link','Relevance','date','cites','title','authors','abstract','keywords','ads_id','Indices','embed']]
287
  df.index += 1
288
  return df
289
 
@@ -391,7 +391,7 @@ def Library(query):
391
  papers_df = run_query_ret(st.session_state.query)
392
  op_docs = ''
393
  for i in range(len(papers_df)):
394
- op_docs = op_docs + 'Paper %.0f:' %(i+1) + papers_df['title'][i] + '\n' + papers_df['abstract'][i] + '\n\n'
395
 
396
  return op_docs
397
 
@@ -451,7 +451,7 @@ def run_rag_qa(query, papers_df):
451
  metadata = {"source": row['ads_id']}
452
  doc = Document(page_content=content, metadata=metadata)
453
  documents.append(doc)
454
- my_bar.progress((i+1)/len(papers_df), text='adding documents to LLM context')
455
 
456
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=50, add_start_index=True)
457
 
@@ -562,7 +562,7 @@ def calc_outlier_flag(papers_df, top_k, cutoff_adjust = 0.1):
562
 
563
  def make_embedding_plot(papers_df, consensus_answer):
564
 
565
- plt_indices = np.array(papers_df['Indices'].tolist())
566
 
567
  if 'arxiv_corpus' not in st.session_state:
568
  st.session_state.arxiv_corpus = load_arxiv_corpus()
@@ -574,10 +574,17 @@ def make_embedding_plot(papers_df, consensus_answer):
574
  alphas = np.ones((len(plt_indices),)) * 0.9
575
  alphas[outlier_flag] = 0.5
576
 
577
- fig = plt.figure(figsize=(9,12))
578
  plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
579
- plt.scatter(xax[plt_indices], yax[plt_indices], s=300*alphas**2, alpha=alphas, c='w')
580
- plt.scatter(xax[plt_indices], yax[plt_indices], s=100*alphas**2, alpha=alphas, c='dodgerblue')
 
 
 
 
 
 
 
581
  # plt.scatter(xax[plt_indices][outlier_flag], yax[plt_indices][outlier_flag], s=100, alpha=1., c='firebrick')
582
  plt.axis([0,20,-4.2,18])
583
  plt.axis('off')
@@ -589,7 +596,7 @@ def make_embedding_plot(papers_df, consensus_answer):
589
 
590
  if st.session_state.get('runpfdr'):
591
  with st.spinner(search_text_list[np.random.choice(len(search_text_list))]):
592
- st.write('Settings: [Kw:',toggle_a, 'Time:',toggle_b, 'Cite:',toggle_c, '] top_k:',top_k, 'retrieval:',method)
593
 
594
  papers_df = run_query_ret(st.session_state.query)
595
  st.header(st.session_state.query)
@@ -636,7 +643,7 @@ if st.session_state.get('runpfdr'):
636
 
637
  with st.spinner("Evaluating abstract consensus"):
638
  with st.expander("Abstract consensus", expanded=True):
639
- consensus_answer = evaluate_overall_consensus(query, [papers_df['abstract'][i] for i in range(len(papers_df))])
640
  st.subheader("Consensus: "+consensus_answer.consensus)
641
  st.markdown(consensus_answer.explanation)
642
  st.markdown('Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score)
 
43
  # import anthropic
44
  import cohere
45
  import faiss
46
+ import matplotlib.pyplot as plt
47
  import spacy
48
  from string import punctuation
49
  import pytextrank
 
282
  indices = [i for i in top_results]
283
  df.insert(1,'ADS Link',links,True)
284
  df.insert(2,'Relevance',scores,True)
285
+ df.insert(3,'indices',indices,True)
286
+ df = df[['ADS Link','Relevance','date','cites','title','authors','abstract','keywords','ads_id','indices','embed']]
287
  df.index += 1
288
  return df
289
 
 
391
  papers_df = run_query_ret(st.session_state.query)
392
  op_docs = ''
393
  for i in range(len(papers_df)):
394
+ op_docs = op_docs + 'Paper %.0f:' %(i+1) + papers_df['title'][i+1] + '\n' + papers_df['abstract'][i+1] + '\n\n'
395
 
396
  return op_docs
397
 
 
451
  metadata = {"source": row['ads_id']}
452
  doc = Document(page_content=content, metadata=metadata)
453
  documents.append(doc)
454
+ my_bar.progress((i)/len(papers_df), text='adding documents to LLM context')
455
 
456
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=50, add_start_index=True)
457
 
 
562
 
563
  def make_embedding_plot(papers_df, consensus_answer):
564
 
565
+ plt_indices = np.array(papers_df['indices'].tolist())
566
 
567
  if 'arxiv_corpus' not in st.session_state:
568
  st.session_state.arxiv_corpus = load_arxiv_corpus()
 
574
  alphas = np.ones((len(plt_indices),)) * 0.9
575
  alphas[outlier_flag] = 0.5
576
 
577
+ fig = plt.figure(figsize=(9*2.,12*2.))
578
  plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
579
+
580
+ clkws = np.load('kw_tags.npz')
581
+ all_x, all_y, all_topics, repeat_flag = clkws['all_x'], clkws['all_y'], clkws['all_topics'], clkws['repeat_flag']
582
+ for i in range(len(all_topics)):
583
+ if repeat_flag[i] == False:
584
+ plt.text(all_x[i], all_y[i], all_topics[i],fontsize=9,ha="center", va="center",
585
+ bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.3',alpha=0.81))
586
+ plt.scatter(xax[plt_indices], yax[plt_indices], s=300*alphas**2, alpha=alphas, c='w',zorder=1000)
587
+ plt.scatter(xax[plt_indices], yax[plt_indices], s=100*alphas**2, alpha=alphas, c='dodgerblue',zorder=1001)
588
  # plt.scatter(xax[plt_indices][outlier_flag], yax[plt_indices][outlier_flag], s=100, alpha=1., c='firebrick')
589
  plt.axis([0,20,-4.2,18])
590
  plt.axis('off')
 
596
 
597
  if st.session_state.get('runpfdr'):
598
  with st.spinner(search_text_list[np.random.choice(len(search_text_list))]):
599
+ st.write('Settings: [Kw:',toggle_a, 'Time:',toggle_b, 'Cite:',toggle_c, '] top_k:',top_k, 'retrieval: `',method+'`')
600
 
601
  papers_df = run_query_ret(st.session_state.query)
602
  st.header(st.session_state.query)
 
643
 
644
  with st.spinner("Evaluating abstract consensus"):
645
  with st.expander("Abstract consensus", expanded=True):
646
+ consensus_answer = evaluate_overall_consensus(query, [papers_df['abstract'][i+1] for i in range(len(papers_df))])
647
  st.subheader("Consensus: "+consensus_answer.consensus)
648
  st.markdown(consensus_answer.explanation)
649
  st.markdown('Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score)
kw_tags.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d7068524d3d3029b8d36f4dd2fdf20d5c8a12fc69d8d1a404921aa54a6b40a8
3
+ size 17849