jskim commited on
Commit
300debd
1 Parent(s): 81ca652

score precision consistent. adding separation line in between results. made phrase matching colors lighter for better readability.

Browse files
Files changed (2) hide show
  1. app.py +41 -29
  2. score.py +4 -3
app.py CHANGED
@@ -26,7 +26,8 @@ sent_model.to(device)
26
  def get_similar_paper(
27
  abstract_text_input,
28
  author_id_input,
29
- results={} # variable will be updated and returned
 
30
  ):
31
  num_papers_show = 10 # number of top papers to show from the reviewer
32
  print('retrieving similar papers...')
@@ -34,10 +35,12 @@ def get_similar_paper(
34
  input_sentences = sent_tokenize(abstract_text_input)
35
 
36
  # Get author papers from id
 
37
  name, papers = get_text_from_author_id(author_id_input)
38
 
39
  # Compute Doc-level affinity scores for the Papers
40
- print('computing document scores...')
 
41
  # TODO detect duplicate papers?
42
  titles, abstracts, paper_urls, doc_scores = compute_document_score(
43
  doc_model,
@@ -64,12 +67,12 @@ def get_similar_paper(
64
  end = time.time()
65
  print('paper retrieval complete in [%0.2f] seconds'%(end - start))
66
 
 
67
  print('obtaining highlights..')
68
  start = time.time()
69
  input_sentences = sent_tokenize(abstract_text_input)
70
  num_sents = len(input_sentences)
71
 
72
- summary_info = dict() # elements to visualize upfront
73
  for aa, (tt, ab, ds) in enumerate(zip(titles, abstracts, doc_scores)):
74
  # Compute sent-level and phrase-level affinity scores for each papers
75
  sent_ids, sent_scores, info, top_pairs_info = get_highlight_info(
@@ -90,7 +93,7 @@ def get_similar_paper(
90
  results[display_title[aa]] = {
91
  'title': tt,
92
  'abstract': ab,
93
- 'doc_score': ds,
94
  'source_sentences': input_sentences,
95
  'highlight': word_scores,
96
  'top_pairs': top_pairs_info
@@ -112,15 +115,14 @@ def get_similar_paper(
112
  top_num_info_show = 2 # number of sentence pairs from each paper to show upfront
113
  summary_out = []
114
  for i in range(top_papers_show):
115
- # TODO keep score precision consistent
116
  out_tmp = [
117
  gr.update(value=titles[i], visible=True),
118
- gr.update(value=round(doc_scores[i],3), visible=True) # document affinity
119
  ]
120
  tp = results[display_title[i]]['top_pairs']
121
  for j in range(top_num_info_show):
122
  out_tmp += [
123
- gr.update(value=round(tp[j]['score'],3), visible=True), # sentence relevance
124
  tp[j]['query']['original'],
125
  tp[j]['query'],
126
  tp[j]['candidate']['original'],
@@ -131,6 +133,8 @@ def get_similar_paper(
131
  # add updates to the show more button
132
  out = out + summary_out + [gr.update(visible=True)] # make show more button visible
133
  assert(len(out) == (top_num_info_show * 5 + 2) * top_papers_show + 3)
 
 
134
 
135
  # add the search results to pass on to the Gradio State varaible
136
  out += [results]
@@ -194,7 +198,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
194
  ##### Relevant Parts from Top Papers
195
  - You will be shown three most relevant papers from the reviewer with high **affinity scores** (ranging from 0 to 1) computed using text representations from a [language model](https://github.com/allenai/specter/tree/master/specter).
196
  - For each of the paper, we present relevant pieces of information from the submission and the paper: two pairs of (sentence relevance score, sentence from the submission abstract, sentnece from the paper abstract)
197
- - **<span style="color:black;background-color:#5296D5;">Blue highlights</span>** inidicate phrases that are included in both sentences.
198
  ##### More Relevant Parts
199
  - If the information above is not enough, click `See more relevant parts from other papers` button.
200
  - You will see a list top 10 similar papers along with the affinity scores for each.
@@ -203,7 +207,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
203
  - On the left, you will see individual sentences from the submission abstract to select from.
204
  - On the right, you will see the abstract of the selected paper, with **highlights** incidating relevant parts to the selected sentence.
205
  - **<span style="color:black;background-color:#DB7262;">Red highlights</span>**: sentences with high semantic similarity to the selected sentence.
206
- - **<span style="color:black;background-color:#5296D5;">Blue highlights</span>**: phrases included in the selected sentence.
207
  - To see relevant parts in a different paper from the reviewer, select the new paper.
208
  -------
209
  """
@@ -220,21 +224,25 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
220
  name = gr.Textbox(label='Confirm Reviewer Name', interactive=False)
221
  author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name)
222
  with gr.Row():
223
- compute_btn = gr.Button('What Makes This a Good Match?') # TODO indicate the progress when pressed
 
 
 
 
224
 
225
  ### OVERVIEW
226
  # Paper title, score, and top-ranking sentence pairs -- two sentence pairs per paper, three papers
227
  ## ONE BLOCK OF INFO FOR A SINGLE PAPER
228
  ## PAPER1
229
- # TODO link to the paper
230
  with gr.Row():
231
  with gr.Column(scale=3):
232
  paper_title1 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
233
  with gr.Column(scale=1):
234
- affinity1 = gr.Number(label='Affinity', interactive=False, value=0, visible=False)
235
  with gr.Row() as rel1_1:
236
  with gr.Column(scale=1):
237
- sent_pair_score1_1 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
238
  with gr.Column(scale=4):
239
  sent_pair_source1_1 = gr.Textbox(label='Sentence from Submission', visible=False)
240
  sent_pair_source1_1_hl = gr.components.Interpretation(sent_pair_source1_1)
@@ -243,27 +251,28 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
243
  sent_pair_candidate1_1_hl = gr.components.Interpretation(sent_pair_candidate1_1)
244
  with gr.Row() as rel1_2:
245
  with gr.Column(scale=1):
246
- sent_pair_score1_2 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
247
  with gr.Column(scale=4):
248
  sent_pair_source1_2 = gr.Textbox(label='Sentence from Submission', visible=False)
249
  sent_pair_source1_2_hl = gr.components.Interpretation(sent_pair_source1_2)
250
  with gr.Column(scale=4):
251
  sent_pair_candidate1_2 = gr.Textbox(label='Sentence from Paper', visible=False)
252
  sent_pair_candidate1_2_hl = gr.components.Interpretation(sent_pair_candidate1_2)
253
- # TODO demarcate the entries
254
- gr.Markdown(
255
- """---"""
256
- )
 
257
 
258
  ## PAPER 2
259
  with gr.Row():
260
  with gr.Column(scale=3):
261
  paper_title2 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
262
  with gr.Column(scale=1):
263
- affinity2 = gr.Number(label='Affinity', interactive=False, value=0., visible=False)
264
  with gr.Row() as rel2_1:
265
  with gr.Column(scale=1):
266
- sent_pair_score2_1 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
267
  with gr.Column(scale=4):
268
  sent_pair_source2_1 = gr.Textbox(label='Sentence from Submission', visible=False)
269
  sent_pair_source2_1_hl = gr.components.Interpretation(sent_pair_source2_1)
@@ -272,7 +281,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
272
  sent_pair_candidate2_1_hl = gr.components.Interpretation(sent_pair_candidate2_1)
273
  with gr.Row() as rel2_2:
274
  with gr.Column(scale=1):
275
- sent_pair_score2_2 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
276
  with gr.Column(scale=4):
277
  sent_pair_source2_2 = gr.Textbox(label='Sentence from Submission', visible=False)
278
  sent_pair_source2_2_hl = gr.components.Interpretation(sent_pair_source2_2)
@@ -280,19 +289,20 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
280
  sent_pair_candidate2_2 = gr.Textbox(label='Sentence from Paper', visible=False)
281
  sent_pair_candidate2_2_hl = gr.components.Interpretation(sent_pair_candidate2_2)
282
 
283
- gr.Markdown(
284
- """---"""
285
- )
 
286
 
287
  ## PAPER 3
288
  with gr.Row():
289
  with gr.Column(scale=3):
290
  paper_title3 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
291
  with gr.Column(scale=1):
292
- affinity3 = gr.Number(label='Affinity', interactive=False, value=0, visible=False)
293
  with gr.Row() as rel3_1:
294
  with gr.Column(scale=1):
295
- sent_pair_score3_1 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
296
  with gr.Column(scale=4):
297
  sent_pair_source3_1 = gr.Textbox(label='Sentence from Submission', visible=False)
298
  sent_pair_source3_1_hl = gr.components.Interpretation(sent_pair_source3_1)
@@ -301,7 +311,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
301
  sent_pair_candidate3_1_hl = gr.components.Interpretation(sent_pair_candidate3_1)
302
  with gr.Row() as rel3_2:
303
  with gr.Column(scale=1):
304
- sent_pair_score3_2 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
305
  with gr.Column(scale=4):
306
  sent_pair_source3_2 = gr.Textbox(label='Sentence from Submission', visible=False)
307
  sent_pair_source3_2_hl = gr.components.Interpretation(sent_pair_source3_2)
@@ -328,7 +338,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
328
  with gr.Column(scale=3):
329
  paper_title = gr.Textbox(label='Title', interactive=False)
330
  with gr.Column(scale=1):
331
- affinity= gr.Number(label='Affinity', interactive=False, value=0)
332
  with gr.Row():
333
  paper_abstract = gr.Textbox(label='Abstract', interactive=False, visible=False)
334
 
@@ -393,7 +403,9 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
393
  sent_pair_candidate3_2,
394
  sent_pair_candidate3_2_hl,
395
  see_more_rel_btn,
396
- info
 
 
397
  ]
398
  )
399
 
 
26
  def get_similar_paper(
27
  abstract_text_input,
28
  author_id_input,
29
+ results={}, # this state variable will be updated and returned
30
+ # progress=gr.Progress(track_tqdm=True)
31
  ):
32
  num_papers_show = 10 # number of top papers to show from the reviewer
33
  print('retrieving similar papers...')
 
35
  input_sentences = sent_tokenize(abstract_text_input)
36
 
37
  # Get author papers from id
38
+ #progress(0.1, desc="Retrieving reviewer papers ...")
39
  name, papers = get_text_from_author_id(author_id_input)
40
 
41
  # Compute Doc-level affinity scores for the Papers
42
+ # print('computing document scores...')
43
+ #progress(0.5, desc="Computing document scores...")
44
  # TODO detect duplicate papers?
45
  titles, abstracts, paper_urls, doc_scores = compute_document_score(
46
  doc_model,
 
67
  end = time.time()
68
  print('paper retrieval complete in [%0.2f] seconds'%(end - start))
69
 
70
+ #progress(0.4, desc="Obtaining relevant information from the papers...")
71
  print('obtaining highlights..')
72
  start = time.time()
73
  input_sentences = sent_tokenize(abstract_text_input)
74
  num_sents = len(input_sentences)
75
 
 
76
  for aa, (tt, ab, ds) in enumerate(zip(titles, abstracts, doc_scores)):
77
  # Compute sent-level and phrase-level affinity scores for each papers
78
  sent_ids, sent_scores, info, top_pairs_info = get_highlight_info(
 
93
  results[display_title[aa]] = {
94
  'title': tt,
95
  'abstract': ab,
96
+ 'doc_score': '%0.3f'%ds,
97
  'source_sentences': input_sentences,
98
  'highlight': word_scores,
99
  'top_pairs': top_pairs_info
 
115
  top_num_info_show = 2 # number of sentence pairs from each paper to show upfront
116
  summary_out = []
117
  for i in range(top_papers_show):
 
118
  out_tmp = [
119
  gr.update(value=titles[i], visible=True),
120
+ gr.update(value='%0.3f'%doc_scores[i], visible=True) # document affinity
121
  ]
122
  tp = results[display_title[i]]['top_pairs']
123
  for j in range(top_num_info_show):
124
  out_tmp += [
125
+ gr.update(value='%0.3f'%tp[j]['score'], visible=True), # sentence relevance
126
  tp[j]['query']['original'],
127
  tp[j]['query'],
128
  tp[j]['candidate']['original'],
 
133
  # add updates to the show more button
134
  out = out + summary_out + [gr.update(visible=True)] # make show more button visible
135
  assert(len(out) == (top_num_info_show * 5 + 2) * top_papers_show + 3)
136
+
137
+ out += [gr.update(visible=True), gr.update(visible=True)] # demarcation line between results
138
 
139
  # add the search results to pass on to the Gradio State varaible
140
  out += [results]
 
198
  ##### Relevant Parts from Top Papers
199
  - You will be shown three most relevant papers from the reviewer with high **affinity scores** (ranging from 0 to 1) computed using text representations from a [language model](https://github.com/allenai/specter/tree/master/specter).
200
  - For each of the paper, we present relevant pieces of information from the submission and the paper: two pairs of (sentence relevance score, sentence from the submission abstract, sentnece from the paper abstract)
201
+ - **<span style="color:black;background-color:#65B5E3;">Blue highlights</span>** inidicate phrases that are included in both sentences.
202
  ##### More Relevant Parts
203
  - If the information above is not enough, click `See more relevant parts from other papers` button.
204
  - You will see a list top 10 similar papers along with the affinity scores for each.
 
207
  - On the left, you will see individual sentences from the submission abstract to select from.
208
  - On the right, you will see the abstract of the selected paper, with **highlights** incidating relevant parts to the selected sentence.
209
  - **<span style="color:black;background-color:#DB7262;">Red highlights</span>**: sentences with high semantic similarity to the selected sentence.
210
+ - **<span style="color:black;background-color:#65B5E3;">Blue highlights</span>**: phrases included in the selected sentence.
211
  - To see relevant parts in a different paper from the reviewer, select the new paper.
212
  -------
213
  """
 
224
  name = gr.Textbox(label='Confirm Reviewer Name', interactive=False)
225
  author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name)
226
  with gr.Row():
227
+ compute_btn = gr.Button('What Makes This a Good Match?')
228
+
229
+ # TODO indicate the progress when pressed
230
+ with gr.Row():
231
+ search_status = gr.Textbox(label='Search Status', interactive=False, visible=True)
232
 
233
  ### OVERVIEW
234
  # Paper title, score, and top-ranking sentence pairs -- two sentence pairs per paper, three papers
235
  ## ONE BLOCK OF INFO FOR A SINGLE PAPER
236
  ## PAPER1
237
+ # TODO add link to each paper
238
  with gr.Row():
239
  with gr.Column(scale=3):
240
  paper_title1 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
241
  with gr.Column(scale=1):
242
+ affinity1 = gr.Textbox(label='Affinity', interactive=False, value='', visible=False)
243
  with gr.Row() as rel1_1:
244
  with gr.Column(scale=1):
245
+ sent_pair_score1_1 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
246
  with gr.Column(scale=4):
247
  sent_pair_source1_1 = gr.Textbox(label='Sentence from Submission', visible=False)
248
  sent_pair_source1_1_hl = gr.components.Interpretation(sent_pair_source1_1)
 
251
  sent_pair_candidate1_1_hl = gr.components.Interpretation(sent_pair_candidate1_1)
252
  with gr.Row() as rel1_2:
253
  with gr.Column(scale=1):
254
+ sent_pair_score1_2 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
255
  with gr.Column(scale=4):
256
  sent_pair_source1_2 = gr.Textbox(label='Sentence from Submission', visible=False)
257
  sent_pair_source1_2_hl = gr.components.Interpretation(sent_pair_source1_2)
258
  with gr.Column(scale=4):
259
  sent_pair_candidate1_2 = gr.Textbox(label='Sentence from Paper', visible=False)
260
  sent_pair_candidate1_2_hl = gr.components.Interpretation(sent_pair_candidate1_2)
261
+
262
+ with gr.Row(visible=False) as demarc1:
263
+ gr.Markdown(
264
+ """---"""
265
+ )
266
 
267
  ## PAPER 2
268
  with gr.Row():
269
  with gr.Column(scale=3):
270
  paper_title2 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
271
  with gr.Column(scale=1):
272
+ affinity2 = gr.Textbox(label='Affinity', interactive=False, value='', visible=False)
273
  with gr.Row() as rel2_1:
274
  with gr.Column(scale=1):
275
+ sent_pair_score2_1 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
276
  with gr.Column(scale=4):
277
  sent_pair_source2_1 = gr.Textbox(label='Sentence from Submission', visible=False)
278
  sent_pair_source2_1_hl = gr.components.Interpretation(sent_pair_source2_1)
 
281
  sent_pair_candidate2_1_hl = gr.components.Interpretation(sent_pair_candidate2_1)
282
  with gr.Row() as rel2_2:
283
  with gr.Column(scale=1):
284
+ sent_pair_score2_2 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
285
  with gr.Column(scale=4):
286
  sent_pair_source2_2 = gr.Textbox(label='Sentence from Submission', visible=False)
287
  sent_pair_source2_2_hl = gr.components.Interpretation(sent_pair_source2_2)
 
289
  sent_pair_candidate2_2 = gr.Textbox(label='Sentence from Paper', visible=False)
290
  sent_pair_candidate2_2_hl = gr.components.Interpretation(sent_pair_candidate2_2)
291
 
292
+ with gr.Row(visible=False) as demarc2:
293
+ gr.Markdown(
294
+ """---"""
295
+ )
296
 
297
  ## PAPER 3
298
  with gr.Row():
299
  with gr.Column(scale=3):
300
  paper_title3 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
301
  with gr.Column(scale=1):
302
+ affinity3 = gr.Textbox(label='Affinity', interactive=False, value='', visible=False)
303
  with gr.Row() as rel3_1:
304
  with gr.Column(scale=1):
305
+ sent_pair_score3_1 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
306
  with gr.Column(scale=4):
307
  sent_pair_source3_1 = gr.Textbox(label='Sentence from Submission', visible=False)
308
  sent_pair_source3_1_hl = gr.components.Interpretation(sent_pair_source3_1)
 
311
  sent_pair_candidate3_1_hl = gr.components.Interpretation(sent_pair_candidate3_1)
312
  with gr.Row() as rel3_2:
313
  with gr.Column(scale=1):
314
+ sent_pair_score3_2 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
315
  with gr.Column(scale=4):
316
  sent_pair_source3_2 = gr.Textbox(label='Sentence from Submission', visible=False)
317
  sent_pair_source3_2_hl = gr.components.Interpretation(sent_pair_source3_2)
 
338
  with gr.Column(scale=3):
339
  paper_title = gr.Textbox(label='Title', interactive=False)
340
  with gr.Column(scale=1):
341
+ affinity= gr.Textbox(label='Affinity', interactive=False, value='')
342
  with gr.Row():
343
  paper_abstract = gr.Textbox(label='Abstract', interactive=False, visible=False)
344
 
 
403
  sent_pair_candidate3_2,
404
  sent_pair_candidate3_2_hl,
405
  see_more_rel_btn,
406
+ demarc1,
407
+ demarc2,
408
+ info,
409
  ]
410
  )
411
 
score.py CHANGED
@@ -112,7 +112,7 @@ def mark_words(query_sents, words, all_words, sent_start_id, sent_ids, sent_scor
112
  get_match_phrase(query_words, all_words[sent_start_id[sid]:])
113
 
114
  # update selected phrase scores (-1 meaning a different color in gradio)
115
- word_scores[is_selected_sent+is_selected_phrase==2] = -1
116
 
117
  output[i] = {
118
  'is_selected_sent': is_selected_sent,
@@ -154,8 +154,9 @@ def get_highlight_info(model, text1, text2, K=None):
154
  q_words = word_tokenize(q_sent)
155
  c_words = word_tokenize(c_sent)
156
  mask1, mask2 = get_match_phrase(q_words, c_words)
157
- mask1 *= -1 # mark matching phrases as blue
158
- mask2 *= -1
 
159
  assert(len(mask1) == len(q_words) and len(mask2) == len(c_words))
160
  top_pairs_info[count] = {
161
  'query': {
 
112
  get_match_phrase(query_words, all_words[sent_start_id[sid]:])
113
 
114
  # update selected phrase scores (-1 meaning a different color in gradio)
115
+ word_scores[is_selected_sent+is_selected_phrase==2] = -0.5
116
 
117
  output[i] = {
118
  'is_selected_sent': is_selected_sent,
 
154
  q_words = word_tokenize(q_sent)
155
  c_words = word_tokenize(c_sent)
156
  mask1, mask2 = get_match_phrase(q_words, c_words)
157
+ sc = 0.5
158
+ mask1 *= -sc # mark matching phrases as blue (-1: darkest)
159
+ mask2 *= -sc # mark matching phrases as blue
160
  assert(len(mask1) == len(q_words) and len(mask2) == len(c_words))
161
  top_pairs_info[count] = {
162
  'query': {