Spaces:
Runtime error
Runtime error
pdf input removed. now retrieving urls for reviewer papers.
Browse files- app.py +27 -26
- input_format.py +1 -1
- score.py +5 -2
app.py
CHANGED
@@ -25,7 +25,6 @@ sent_model.to(device)
|
|
25 |
|
26 |
def get_similar_paper(
|
27 |
abstract_text_input,
|
28 |
-
pdf_file_input,
|
29 |
author_id_input,
|
30 |
results={} # variable will be updated and returned
|
31 |
):
|
@@ -34,19 +33,13 @@ def get_similar_paper(
|
|
34 |
start = time.time()
|
35 |
input_sentences = sent_tokenize(abstract_text_input)
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
-
name = None
|
40 |
-
papers = []
|
41 |
-
raise ValueError('Use submission abstract instead.')
|
42 |
-
else:
|
43 |
-
# Get author papers from id
|
44 |
-
name, papers = get_text_from_author_id(author_id_input)
|
45 |
|
46 |
# Compute Doc-level affinity scores for the Papers
|
47 |
print('computing document scores...')
|
48 |
# TODO detect duplicate papers?
|
49 |
-
titles, abstracts, doc_scores = compute_document_score(
|
50 |
doc_model,
|
51 |
tokenizer,
|
52 |
abstract_text_input,
|
@@ -57,6 +50,7 @@ def get_similar_paper(
|
|
57 |
results = {
|
58 |
'titles': titles,
|
59 |
'abstracts': abstracts,
|
|
|
60 |
'doc_scores': doc_scores
|
61 |
}
|
62 |
|
@@ -64,6 +58,7 @@ def get_similar_paper(
|
|
64 |
titles = titles[:num_papers_show]
|
65 |
abstracts = abstracts[:num_papers_show]
|
66 |
doc_scores = doc_scores[:num_papers_show]
|
|
|
67 |
|
68 |
display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)]
|
69 |
end = time.time()
|
@@ -117,14 +112,15 @@ def get_similar_paper(
|
|
117 |
top_num_info_show = 2 # number of sentence pairs from each paper to show upfront
|
118 |
summary_out = []
|
119 |
for i in range(top_papers_show):
|
|
|
120 |
out_tmp = [
|
121 |
gr.update(value=titles[i], visible=True),
|
122 |
-
gr.update(value=doc_scores[i], visible=True)
|
123 |
]
|
124 |
tp = results[display_title[i]]['top_pairs']
|
125 |
for j in range(top_num_info_show):
|
126 |
out_tmp += [
|
127 |
-
gr.update(value=tp[j]['score'], visible=True),
|
128 |
tp[j]['query']['original'],
|
129 |
tp[j]['query'],
|
130 |
tp[j]['candidate']['original'],
|
@@ -133,7 +129,7 @@ def get_similar_paper(
|
|
133 |
summary_out += out_tmp
|
134 |
|
135 |
# add updates to the show more button
|
136 |
-
out = out + summary_out + [gr.update(visible=True)] # show more button
|
137 |
assert(len(out) == (top_num_info_show * 5 + 2) * top_papers_show + 3)
|
138 |
|
139 |
# add the search results to pass on to the Gradio State varaible
|
@@ -196,7 +192,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
|
|
196 |
- Once the name is confirmed, press the `What Makes This a Good Match?` button.
|
197 |
- Based on the input information, the tool will first search for similar papers from the reviewer's previous publications using [Semantic Scholar API](https://www.semanticscholar.org/product/api).
|
198 |
##### Relevant Parts from Top Papers
|
199 |
-
- You will be shown three most relevant papers from the reviewer with high **affinity scores** (ranging from 0
|
200 |
- For each of the paper, we present relevant pieces of information from the submission and the paper: two pairs of (sentence relevance score, sentence from the submission abstract, sentnece from the paper abstract)
|
201 |
- **<span style="color:black;background-color:#5296D5;">Blue highlights</span>** inidicate phrases that are included in both sentences.
|
202 |
##### More Relevant Parts
|
@@ -217,8 +213,6 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
|
|
217 |
with gr.Row() as input_row:
|
218 |
with gr.Column():
|
219 |
abstract_text_input = gr.Textbox(label='Submission Abstract')
|
220 |
-
with gr.Column():
|
221 |
-
pdf_file_input = gr.File(label='OR upload a submission PDF File')
|
222 |
with gr.Column():
|
223 |
with gr.Row():
|
224 |
author_id_input = gr.Textbox(label='Reviewer Link or ID (Semantic Scholar)')
|
@@ -226,13 +220,13 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
|
|
226 |
name = gr.Textbox(label='Confirm Reviewer Name', interactive=False)
|
227 |
author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name)
|
228 |
with gr.Row():
|
229 |
-
compute_btn = gr.Button('What Makes This a Good Match?')
|
230 |
|
231 |
### OVERVIEW
|
232 |
# Paper title, score, and top-ranking sentence pairs -- two sentence pairs per paper, three papers
|
233 |
-
# TODO blockfy similar components together and simplify
|
234 |
## ONE BLOCK OF INFO FOR A SINGLE PAPER
|
235 |
## PAPER1
|
|
|
236 |
with gr.Row():
|
237 |
with gr.Column(scale=3):
|
238 |
paper_title1 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
|
@@ -256,13 +250,17 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
|
|
256 |
with gr.Column(scale=4):
|
257 |
sent_pair_candidate1_2 = gr.Textbox(label='Sentence from Paper', visible=False)
|
258 |
sent_pair_candidate1_2_hl = gr.components.Interpretation(sent_pair_candidate1_2)
|
|
|
|
|
|
|
|
|
259 |
|
260 |
## PAPER 2
|
261 |
with gr.Row():
|
262 |
with gr.Column(scale=3):
|
263 |
paper_title2 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
|
264 |
with gr.Column(scale=1):
|
265 |
-
affinity2 = gr.Number(label='Affinity', interactive=False, value=0
|
266 |
with gr.Row() as rel2_1:
|
267 |
with gr.Column(scale=1):
|
268 |
sent_pair_score2_1 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
|
@@ -270,7 +268,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
|
|
270 |
sent_pair_source2_1 = gr.Textbox(label='Sentence from Submission', visible=False)
|
271 |
sent_pair_source2_1_hl = gr.components.Interpretation(sent_pair_source2_1)
|
272 |
with gr.Column(scale=4):
|
273 |
-
sent_pair_candidate2_1 = gr.Textbox(label='Sentence from
|
274 |
sent_pair_candidate2_1_hl = gr.components.Interpretation(sent_pair_candidate2_1)
|
275 |
with gr.Row() as rel2_2:
|
276 |
with gr.Column(scale=1):
|
@@ -279,9 +277,13 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
|
|
279 |
sent_pair_source2_2 = gr.Textbox(label='Sentence from Submission', visible=False)
|
280 |
sent_pair_source2_2_hl = gr.components.Interpretation(sent_pair_source2_2)
|
281 |
with gr.Column(scale=4):
|
282 |
-
sent_pair_candidate2_2 = gr.Textbox(label='Sentence from
|
283 |
sent_pair_candidate2_2_hl = gr.components.Interpretation(sent_pair_candidate2_2)
|
284 |
-
|
|
|
|
|
|
|
|
|
285 |
## PAPER 3
|
286 |
with gr.Row():
|
287 |
with gr.Column(scale=3):
|
@@ -295,7 +297,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
|
|
295 |
sent_pair_source3_1 = gr.Textbox(label='Sentence from Submission', visible=False)
|
296 |
sent_pair_source3_1_hl = gr.components.Interpretation(sent_pair_source3_1)
|
297 |
with gr.Column(scale=4):
|
298 |
-
sent_pair_candidate3_1 = gr.Textbox(label='Sentence from
|
299 |
sent_pair_candidate3_1_hl = gr.components.Interpretation(sent_pair_candidate3_1)
|
300 |
with gr.Row() as rel3_2:
|
301 |
with gr.Column(scale=1):
|
@@ -304,7 +306,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
|
|
304 |
sent_pair_source3_2 = gr.Textbox(label='Sentence from Submission', visible=False)
|
305 |
sent_pair_source3_2_hl = gr.components.Interpretation(sent_pair_source3_2)
|
306 |
with gr.Column(scale=4):
|
307 |
-
sent_pair_candidate3_2 = gr.Textbox(label='Sentence from
|
308 |
sent_pair_candidate3_2_hl = gr.components.Interpretation(sent_pair_candidate3_2)
|
309 |
|
310 |
## Show more button
|
@@ -348,7 +350,6 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
|
|
348 |
fn=get_similar_paper,
|
349 |
inputs=[
|
350 |
abstract_text_input,
|
351 |
-
pdf_file_input,
|
352 |
author_id_input,
|
353 |
info
|
354 |
],
|
@@ -437,7 +438,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
|
|
437 |
gr.Markdown(
|
438 |
"""
|
439 |
---------
|
440 |
-
**Disclaimer.** This tool and its output should not serve as the sole justification for confirming a match for the submission. It is intended as a supplementary tool that the
|
441 |
"""
|
442 |
)
|
443 |
|
|
|
25 |
|
26 |
def get_similar_paper(
|
27 |
abstract_text_input,
|
|
|
28 |
author_id_input,
|
29 |
results={} # variable will be updated and returned
|
30 |
):
|
|
|
33 |
start = time.time()
|
34 |
input_sentences = sent_tokenize(abstract_text_input)
|
35 |
|
36 |
+
# Get author papers from id
|
37 |
+
name, papers = get_text_from_author_id(author_id_input)
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
# Compute Doc-level affinity scores for the Papers
|
40 |
print('computing document scores...')
|
41 |
# TODO detect duplicate papers?
|
42 |
+
titles, abstracts, paper_urls, doc_scores = compute_document_score(
|
43 |
doc_model,
|
44 |
tokenizer,
|
45 |
abstract_text_input,
|
|
|
50 |
results = {
|
51 |
'titles': titles,
|
52 |
'abstracts': abstracts,
|
53 |
+
'urls': paper_urls,
|
54 |
'doc_scores': doc_scores
|
55 |
}
|
56 |
|
|
|
58 |
titles = titles[:num_papers_show]
|
59 |
abstracts = abstracts[:num_papers_show]
|
60 |
doc_scores = doc_scores[:num_papers_show]
|
61 |
+
paper_urls = paper_urls[:num_papers_show]
|
62 |
|
63 |
display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)]
|
64 |
end = time.time()
|
|
|
112 |
top_num_info_show = 2 # number of sentence pairs from each paper to show upfront
|
113 |
summary_out = []
|
114 |
for i in range(top_papers_show):
|
115 |
+
# TODO keep score precision consistent
|
116 |
out_tmp = [
|
117 |
gr.update(value=titles[i], visible=True),
|
118 |
+
gr.update(value=round(doc_scores[i],3), visible=True) # document affinity
|
119 |
]
|
120 |
tp = results[display_title[i]]['top_pairs']
|
121 |
for j in range(top_num_info_show):
|
122 |
out_tmp += [
|
123 |
+
gr.update(value=round(tp[j]['score'],3), visible=True), # sentence relevance
|
124 |
tp[j]['query']['original'],
|
125 |
tp[j]['query'],
|
126 |
tp[j]['candidate']['original'],
|
|
|
129 |
summary_out += out_tmp
|
130 |
|
131 |
# add updates to the show more button
|
132 |
+
out = out + summary_out + [gr.update(visible=True)] # make show more button visible
|
133 |
assert(len(out) == (top_num_info_show * 5 + 2) * top_papers_show + 3)
|
134 |
|
135 |
# add the search results to pass on to the Gradio State varaible
|
|
|
192 |
- Once the name is confirmed, press the `What Makes This a Good Match?` button.
|
193 |
- Based on the input information, the tool will first search for similar papers from the reviewer's previous publications using [Semantic Scholar API](https://www.semanticscholar.org/product/api).
|
194 |
##### Relevant Parts from Top Papers
|
195 |
+
- You will be shown three most relevant papers from the reviewer with high **affinity scores** (ranging from 0 to 1) computed using text representations from a [language model](https://github.com/allenai/specter/tree/master/specter).
|
196 |
- For each of the paper, we present relevant pieces of information from the submission and the paper: two pairs of (sentence relevance score, sentence from the submission abstract, sentnece from the paper abstract)
|
197 |
- **<span style="color:black;background-color:#5296D5;">Blue highlights</span>** inidicate phrases that are included in both sentences.
|
198 |
##### More Relevant Parts
|
|
|
213 |
with gr.Row() as input_row:
|
214 |
with gr.Column():
|
215 |
abstract_text_input = gr.Textbox(label='Submission Abstract')
|
|
|
|
|
216 |
with gr.Column():
|
217 |
with gr.Row():
|
218 |
author_id_input = gr.Textbox(label='Reviewer Link or ID (Semantic Scholar)')
|
|
|
220 |
name = gr.Textbox(label='Confirm Reviewer Name', interactive=False)
|
221 |
author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name)
|
222 |
with gr.Row():
|
223 |
+
compute_btn = gr.Button('What Makes This a Good Match?') # TODO indicate the progress when pressed
|
224 |
|
225 |
### OVERVIEW
|
226 |
# Paper title, score, and top-ranking sentence pairs -- two sentence pairs per paper, three papers
|
|
|
227 |
## ONE BLOCK OF INFO FOR A SINGLE PAPER
|
228 |
## PAPER1
|
229 |
+
# TODO link to the paper
|
230 |
with gr.Row():
|
231 |
with gr.Column(scale=3):
|
232 |
paper_title1 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
|
|
|
250 |
with gr.Column(scale=4):
|
251 |
sent_pair_candidate1_2 = gr.Textbox(label='Sentence from Paper', visible=False)
|
252 |
sent_pair_candidate1_2_hl = gr.components.Interpretation(sent_pair_candidate1_2)
|
253 |
+
# TODO demarcate the entries
|
254 |
+
gr.Markdown(
|
255 |
+
"""---"""
|
256 |
+
)
|
257 |
|
258 |
## PAPER 2
|
259 |
with gr.Row():
|
260 |
with gr.Column(scale=3):
|
261 |
paper_title2 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
|
262 |
with gr.Column(scale=1):
|
263 |
+
affinity2 = gr.Number(label='Affinity', interactive=False, value=0., visible=False)
|
264 |
with gr.Row() as rel2_1:
|
265 |
with gr.Column(scale=1):
|
266 |
sent_pair_score2_1 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
|
|
|
268 |
sent_pair_source2_1 = gr.Textbox(label='Sentence from Submission', visible=False)
|
269 |
sent_pair_source2_1_hl = gr.components.Interpretation(sent_pair_source2_1)
|
270 |
with gr.Column(scale=4):
|
271 |
+
sent_pair_candidate2_1 = gr.Textbox(label='Sentence from Paper', visible=False)
|
272 |
sent_pair_candidate2_1_hl = gr.components.Interpretation(sent_pair_candidate2_1)
|
273 |
with gr.Row() as rel2_2:
|
274 |
with gr.Column(scale=1):
|
|
|
277 |
sent_pair_source2_2 = gr.Textbox(label='Sentence from Submission', visible=False)
|
278 |
sent_pair_source2_2_hl = gr.components.Interpretation(sent_pair_source2_2)
|
279 |
with gr.Column(scale=4):
|
280 |
+
sent_pair_candidate2_2 = gr.Textbox(label='Sentence from Paper', visible=False)
|
281 |
sent_pair_candidate2_2_hl = gr.components.Interpretation(sent_pair_candidate2_2)
|
282 |
+
|
283 |
+
gr.Markdown(
|
284 |
+
"""---"""
|
285 |
+
)
|
286 |
+
|
287 |
## PAPER 3
|
288 |
with gr.Row():
|
289 |
with gr.Column(scale=3):
|
|
|
297 |
sent_pair_source3_1 = gr.Textbox(label='Sentence from Submission', visible=False)
|
298 |
sent_pair_source3_1_hl = gr.components.Interpretation(sent_pair_source3_1)
|
299 |
with gr.Column(scale=4):
|
300 |
+
sent_pair_candidate3_1 = gr.Textbox(label='Sentence from Paper', visible=False)
|
301 |
sent_pair_candidate3_1_hl = gr.components.Interpretation(sent_pair_candidate3_1)
|
302 |
with gr.Row() as rel3_2:
|
303 |
with gr.Column(scale=1):
|
|
|
306 |
sent_pair_source3_2 = gr.Textbox(label='Sentence from Submission', visible=False)
|
307 |
sent_pair_source3_2_hl = gr.components.Interpretation(sent_pair_source3_2)
|
308 |
with gr.Column(scale=4):
|
309 |
+
sent_pair_candidate3_2 = gr.Textbox(label='Sentence from Paper', visible=False)
|
310 |
sent_pair_candidate3_2_hl = gr.components.Interpretation(sent_pair_candidate3_2)
|
311 |
|
312 |
## Show more button
|
|
|
350 |
fn=get_similar_paper,
|
351 |
inputs=[
|
352 |
abstract_text_input,
|
|
|
353 |
author_id_input,
|
354 |
info
|
355 |
],
|
|
|
438 |
gr.Markdown(
|
439 |
"""
|
440 |
---------
|
441 |
+
**Disclaimer.** This tool and its output should not serve as the sole justification for confirming a match for the submission. It is intended as a supplementary tool that the users may use at their discretion; the correctness of the output of the tool is not guaranteed. This may be improved by updating the internal models used to compute the affinity scores and sentence relevance, which may require additional research independently. The tool does not compromise the privacy of the reviewers as it relies only on their publicly-available information (e.g., names and list of previously published papers).
|
442 |
"""
|
443 |
)
|
444 |
|
input_format.py
CHANGED
@@ -73,7 +73,7 @@ def get_text_from_author_id(author_id, max_count=100):
|
|
73 |
if 'http' in aid: # handle semantic scholar url input
|
74 |
aid = aid.split('/')
|
75 |
aid = aid[aid.index('author')+2]
|
76 |
-
url = "https://api.semanticscholar.org/graph/v1/author/%s?fields=url,name,paperCount,papers,papers.title,papers.abstract"%aid
|
77 |
r = requests.get(url)
|
78 |
if r.status_code == 404:
|
79 |
raise ValueError('Author link not found.')
|
|
|
73 |
if 'http' in aid: # handle semantic scholar url input
|
74 |
aid = aid.split('/')
|
75 |
aid = aid[aid.index('author')+2]
|
76 |
+
url = "https://api.semanticscholar.org/graph/v1/author/%s?fields=url,name,paperCount,papers,papers.title,papers.abstract,papers.url"%aid
|
77 |
r = requests.get(url)
|
78 |
if r.status_code == 404:
|
79 |
raise ValueError('Author link not found.')
|
score.py
CHANGED
@@ -141,7 +141,7 @@ def get_highlight_info(model, text1, text2, K=None):
|
|
141 |
top_pairs = []
|
142 |
ii = np.unravel_index(np.argsort(np.array(sent_scores).ravel())[-top_pair_num:], sent_scores.shape)
|
143 |
for i, j in zip(ii[0][::-1], ii[1][::-1]):
|
144 |
-
score = sent_scores[i,j]
|
145 |
index_pair = (i, sent_ids[i,j].item())
|
146 |
top_pairs.append((score, index_pair)) # list of (score, (sent_id_query, sent_id_candidate))
|
147 |
|
@@ -218,10 +218,12 @@ def compute_document_score(doc_model, tokenizer, query, papers, batch=5):
|
|
218 |
scores = []
|
219 |
titles = []
|
220 |
abstracts = []
|
|
|
221 |
for p in papers:
|
222 |
if p['title'] is not None and p['abstract'] is not None:
|
223 |
titles.append(p['title'])
|
224 |
abstracts.append(p['abstract'])
|
|
|
225 |
scores = predict_docscore(doc_model, tokenizer, query, titles, abstracts, batch=batch)
|
226 |
assert(len(scores) == len(abstracts))
|
227 |
idx_sorted = np.argsort(scores)[::-1]
|
@@ -229,5 +231,6 @@ def compute_document_score(doc_model, tokenizer, query, papers, batch=5):
|
|
229 |
titles_sorted = [titles[x] for x in idx_sorted]
|
230 |
abstracts_sorted = [abstracts[x] for x in idx_sorted]
|
231 |
scores_sorted = [scores[x] for x in idx_sorted]
|
|
|
232 |
|
233 |
-
return titles_sorted, abstracts_sorted, scores_sorted
|
|
|
141 |
top_pairs = []
|
142 |
ii = np.unravel_index(np.argsort(np.array(sent_scores).ravel())[-top_pair_num:], sent_scores.shape)
|
143 |
for i, j in zip(ii[0][::-1], ii[1][::-1]):
|
144 |
+
score = sent_scores[i,j].item()
|
145 |
index_pair = (i, sent_ids[i,j].item())
|
146 |
top_pairs.append((score, index_pair)) # list of (score, (sent_id_query, sent_id_candidate))
|
147 |
|
|
|
218 |
scores = []
|
219 |
titles = []
|
220 |
abstracts = []
|
221 |
+
urls = []
|
222 |
for p in papers:
|
223 |
if p['title'] is not None and p['abstract'] is not None:
|
224 |
titles.append(p['title'])
|
225 |
abstracts.append(p['abstract'])
|
226 |
+
urls.append(p['url'])
|
227 |
scores = predict_docscore(doc_model, tokenizer, query, titles, abstracts, batch=batch)
|
228 |
assert(len(scores) == len(abstracts))
|
229 |
idx_sorted = np.argsort(scores)[::-1]
|
|
|
231 |
titles_sorted = [titles[x] for x in idx_sorted]
|
232 |
abstracts_sorted = [abstracts[x] for x in idx_sorted]
|
233 |
scores_sorted = [scores[x] for x in idx_sorted]
|
234 |
+
urls_sorted = [urls[x] for x in idx_sorted]
|
235 |
|
236 |
+
return titles_sorted, abstracts_sorted, urls_sorted, scores_sorted
|