Spaces:

jskim
/

paper-matching

Runtime error

App Files Files Community

jskim commited on Mar 11, 2023

Commit

300debd

•

1 Parent(s): 81ca652

score precision consistent. adding separation line in between results. made phrase matching colors lighter for better readability.

Browse files

Files changed (2) hide show

app.py +41 -29
score.py +4 -3

app.py CHANGED Viewed

@@ -26,7 +26,8 @@ sent_model.to(device)
 def get_similar_paper(
     abstract_text_input,
     author_id_input,
-    results={} # variable will be updated and returned
 ):
     num_papers_show = 10 # number of top papers to show from the reviewer
     print('retrieving similar papers...')
@@ -34,10 +35,12 @@ def get_similar_paper(
     input_sentences = sent_tokenize(abstract_text_input)
     # Get author papers from id
     name, papers = get_text_from_author_id(author_id_input)
     # Compute Doc-level affinity scores for the Papers
-    print('computing document scores...')
     # TODO detect duplicate papers?
     titles, abstracts, paper_urls, doc_scores = compute_document_score(
         doc_model,
@@ -64,12 +67,12 @@ def get_similar_paper(
     end = time.time()
     print('paper retrieval complete in [%0.2f] seconds'%(end - start))
     print('obtaining highlights..')
     start = time.time()
     input_sentences = sent_tokenize(abstract_text_input)
     num_sents = len(input_sentences)
-    summary_info = dict() # elements to visualize upfront
     for aa, (tt, ab, ds) in enumerate(zip(titles, abstracts, doc_scores)):
         # Compute sent-level and phrase-level affinity scores for each papers
         sent_ids, sent_scores, info, top_pairs_info = get_highlight_info(
@@ -90,7 +93,7 @@ def get_similar_paper(
         results[display_title[aa]] = {
             'title': tt,
             'abstract': ab,
-            'doc_score': ds,
             'source_sentences': input_sentences,
             'highlight': word_scores,
             'top_pairs': top_pairs_info
@@ -112,15 +115,14 @@ def get_similar_paper(
     top_num_info_show = 2 # number of sentence pairs from each paper to show upfront
     summary_out = []
     for i in range(top_papers_show):
-        # TODO keep score precision consistent
         out_tmp = [
             gr.update(value=titles[i], visible=True),
-            gr.update(value=round(doc_scores[i],3), visible=True) # document affinity
         ]
         tp = results[display_title[i]]['top_pairs']
         for j in range(top_num_info_show):
             out_tmp += [
-                gr.update(value=round(tp[j]['score'],3), visible=True), # sentence relevance
                 tp[j]['query']['original'],
                 tp[j]['query'],
                 tp[j]['candidate']['original'],
@@ -131,6 +133,8 @@ def get_similar_paper(
     # add updates to the show more button
     out = out + summary_out + [gr.update(visible=True)] # make show more button visible
     assert(len(out) == (top_num_info_show * 5 + 2) * top_papers_show + 3)
     # add the search results to pass on to the Gradio State varaible
     out += [results]
@@ -194,7 +198,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
 ##### Relevant Parts from Top Papers
 - You will be shown three most relevant papers from the reviewer with high **affinity scores** (ranging from 0 to 1) computed using text representations from a [language model](https://github.com/allenai/specter/tree/master/specter).
 - For each of the paper, we present relevant pieces of information from the submission and the paper: two pairs of (sentence relevance score, sentence from the submission abstract, sentnece from the paper abstract)
-- **<span style="color:black;background-color:#5296D5;">Blue highlights</span>** inidicate phrases that are included in both sentences.
 ##### More Relevant Parts
 - If the information above is not enough, click `See more relevant parts from other papers` button.
 - You will see a list top 10 similar papers along with the affinity scores for each.
@@ -203,7 +207,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
 - On the left, you will see individual sentences from the submission abstract to select from.
 - On the right, you will see the abstract of the selected paper, with **highlights** incidating relevant parts to the selected sentence.
 - **<span style="color:black;background-color:#DB7262;">Red highlights</span>**: sentences with high semantic similarity to the selected sentence.
-- **<span style="color:black;background-color:#5296D5;">Blue highlights</span>**: phrases included in the selected sentence.
 - To see relevant parts in a different paper from the reviewer, select the new paper.
 -------
         """
@@ -220,21 +224,25 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
                 name = gr.Textbox(label='Confirm Reviewer Name', interactive=False)
                 author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name)
     with gr.Row():
-        compute_btn = gr.Button('What Makes This a Good Match?') # TODO indicate the progress when pressed
     ### OVERVIEW
     # Paper title, score, and top-ranking sentence pairs -- two sentence pairs per paper, three papers
     ## ONE BLOCK OF INFO FOR A SINGLE PAPER
     ## PAPER1
-    # TODO link to the paper
     with gr.Row():
         with gr.Column(scale=3):
             paper_title1 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
         with gr.Column(scale=1):
-            affinity1 = gr.Number(label='Affinity', interactive=False, value=0, visible=False)
     with gr.Row() as rel1_1:
         with gr.Column(scale=1):
-            sent_pair_score1_1 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
         with gr.Column(scale=4):
             sent_pair_source1_1 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source1_1_hl = gr.components.Interpretation(sent_pair_source1_1)
@@ -243,27 +251,28 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
             sent_pair_candidate1_1_hl = gr.components.Interpretation(sent_pair_candidate1_1)
     with gr.Row() as rel1_2:
         with gr.Column(scale=1):
-            sent_pair_score1_2 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
         with gr.Column(scale=4):
             sent_pair_source1_2 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source1_2_hl = gr.components.Interpretation(sent_pair_source1_2)
         with gr.Column(scale=4):
             sent_pair_candidate1_2 = gr.Textbox(label='Sentence from Paper', visible=False)
             sent_pair_candidate1_2_hl = gr.components.Interpretation(sent_pair_candidate1_2)
-    # TODO demarcate the entries
-    gr.Markdown(
-        """---"""
-    )
     ## PAPER 2
     with gr.Row():
         with gr.Column(scale=3):
             paper_title2 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
         with gr.Column(scale=1):
-            affinity2 = gr.Number(label='Affinity', interactive=False, value=0., visible=False)
     with gr.Row() as rel2_1:
         with gr.Column(scale=1):
-            sent_pair_score2_1 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
         with gr.Column(scale=4):
             sent_pair_source2_1 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source2_1_hl = gr.components.Interpretation(sent_pair_source2_1)
@@ -272,7 +281,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
             sent_pair_candidate2_1_hl = gr.components.Interpretation(sent_pair_candidate2_1)
     with gr.Row() as rel2_2:
         with gr.Column(scale=1):
-            sent_pair_score2_2 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
         with gr.Column(scale=4):
             sent_pair_source2_2 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source2_2_hl = gr.components.Interpretation(sent_pair_source2_2)
@@ -280,19 +289,20 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
             sent_pair_candidate2_2 = gr.Textbox(label='Sentence from Paper', visible=False)
             sent_pair_candidate2_2_hl = gr.components.Interpretation(sent_pair_candidate2_2)
-    gr.Markdown(
-        """---"""
-    )
     ## PAPER 3
     with gr.Row():
         with gr.Column(scale=3):
             paper_title3 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
         with gr.Column(scale=1):
-            affinity3 = gr.Number(label='Affinity', interactive=False, value=0, visible=False)
     with gr.Row() as rel3_1:
         with gr.Column(scale=1):
-            sent_pair_score3_1 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
         with gr.Column(scale=4):
             sent_pair_source3_1 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source3_1_hl = gr.components.Interpretation(sent_pair_source3_1)
@@ -301,7 +311,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
             sent_pair_candidate3_1_hl = gr.components.Interpretation(sent_pair_candidate3_1)
     with gr.Row() as rel3_2:
         with gr.Column(scale=1):
-            sent_pair_score3_2 = gr.Number(label='Sentence Relevance', interactive=False, value=0, visible=False)
         with gr.Column(scale=4):
             sent_pair_source3_2 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source3_2_hl = gr.components.Interpretation(sent_pair_source3_2)
@@ -328,7 +338,7 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
         with gr.Column(scale=3):
             paper_title = gr.Textbox(label='Title', interactive=False)
         with gr.Column(scale=1):
-            affinity= gr.Number(label='Affinity', interactive=False, value=0)
     with gr.Row():
         paper_abstract = gr.Textbox(label='Abstract', interactive=False, visible=False)
@@ -393,7 +403,9 @@ Below we describe how to use the tool. Also feel free to check out the [video]()
             sent_pair_candidate3_2,
             sent_pair_candidate3_2_hl,
             see_more_rel_btn,
-            info
         ]
     )

 def get_similar_paper(
     abstract_text_input,
     author_id_input,
+    results={}, # this state variable will be updated and returned
+    # progress=gr.Progress(track_tqdm=True)
 ):
     num_papers_show = 10 # number of top papers to show from the reviewer
     print('retrieving similar papers...')
     input_sentences = sent_tokenize(abstract_text_input)
     # Get author papers from id
+    #progress(0.1, desc="Retrieving reviewer papers ...")
     name, papers = get_text_from_author_id(author_id_input)
     # Compute Doc-level affinity scores for the Papers
+    # print('computing document scores...')
+    #progress(0.5, desc="Computing document scores...")
     # TODO detect duplicate papers?
     titles, abstracts, paper_urls, doc_scores = compute_document_score(
         doc_model,
     end = time.time()
     print('paper retrieval complete in [%0.2f] seconds'%(end - start))
+    #progress(0.4, desc="Obtaining relevant information from the papers...")
     print('obtaining highlights..')
     start = time.time()
     input_sentences = sent_tokenize(abstract_text_input)
     num_sents = len(input_sentences)
     for aa, (tt, ab, ds) in enumerate(zip(titles, abstracts, doc_scores)):
         # Compute sent-level and phrase-level affinity scores for each papers
         sent_ids, sent_scores, info, top_pairs_info = get_highlight_info(
         results[display_title[aa]] = {
             'title': tt,
             'abstract': ab,
+            'doc_score': '%0.3f'%ds,
             'source_sentences': input_sentences,
             'highlight': word_scores,
             'top_pairs': top_pairs_info
     top_num_info_show = 2 # number of sentence pairs from each paper to show upfront
     summary_out = []
     for i in range(top_papers_show):
         out_tmp = [
             gr.update(value=titles[i], visible=True),
+            gr.update(value='%0.3f'%doc_scores[i], visible=True) # document affinity
         ]
         tp = results[display_title[i]]['top_pairs']
         for j in range(top_num_info_show):
             out_tmp += [
+                gr.update(value='%0.3f'%tp[j]['score'], visible=True), # sentence relevance
                 tp[j]['query']['original'],
                 tp[j]['query'],
                 tp[j]['candidate']['original'],
     # add updates to the show more button
     out = out + summary_out + [gr.update(visible=True)] # make show more button visible
     assert(len(out) == (top_num_info_show * 5 + 2) * top_papers_show + 3)
+    out += [gr.update(visible=True), gr.update(visible=True)] # demarcation line between results
     # add the search results to pass on to the Gradio State varaible
     out += [results]
 ##### Relevant Parts from Top Papers
 - You will be shown three most relevant papers from the reviewer with high **affinity scores** (ranging from 0 to 1) computed using text representations from a [language model](https://github.com/allenai/specter/tree/master/specter).
 - For each of the paper, we present relevant pieces of information from the submission and the paper: two pairs of (sentence relevance score, sentence from the submission abstract, sentnece from the paper abstract)
+- **<span style="color:black;background-color:#65B5E3;">Blue highlights</span>** inidicate phrases that are included in both sentences.
 ##### More Relevant Parts
 - If the information above is not enough, click `See more relevant parts from other papers` button.
 - You will see a list top 10 similar papers along with the affinity scores for each.
 - On the left, you will see individual sentences from the submission abstract to select from.
 - On the right, you will see the abstract of the selected paper, with **highlights** incidating relevant parts to the selected sentence.
 - **<span style="color:black;background-color:#DB7262;">Red highlights</span>**: sentences with high semantic similarity to the selected sentence.
+- **<span style="color:black;background-color:#65B5E3;">Blue highlights</span>**: phrases included in the selected sentence.
 - To see relevant parts in a different paper from the reviewer, select the new paper.
 -------
         """
                 name = gr.Textbox(label='Confirm Reviewer Name', interactive=False)
                 author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name)
     with gr.Row():
+        compute_btn = gr.Button('What Makes This a Good Match?')
+    # TODO indicate the progress when pressed
+    with gr.Row():
+        search_status = gr.Textbox(label='Search Status', interactive=False, visible=True)
     ### OVERVIEW
     # Paper title, score, and top-ranking sentence pairs -- two sentence pairs per paper, three papers
     ## ONE BLOCK OF INFO FOR A SINGLE PAPER
     ## PAPER1
+    # TODO add link to each paper
     with gr.Row():
         with gr.Column(scale=3):
             paper_title1 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
         with gr.Column(scale=1):
+            affinity1 = gr.Textbox(label='Affinity', interactive=False, value='', visible=False)
     with gr.Row() as rel1_1:
         with gr.Column(scale=1):
+            sent_pair_score1_1 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
         with gr.Column(scale=4):
             sent_pair_source1_1 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source1_1_hl = gr.components.Interpretation(sent_pair_source1_1)
             sent_pair_candidate1_1_hl = gr.components.Interpretation(sent_pair_candidate1_1)
     with gr.Row() as rel1_2:
         with gr.Column(scale=1):
+            sent_pair_score1_2 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
         with gr.Column(scale=4):
             sent_pair_source1_2 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source1_2_hl = gr.components.Interpretation(sent_pair_source1_2)
         with gr.Column(scale=4):
             sent_pair_candidate1_2 = gr.Textbox(label='Sentence from Paper', visible=False)
             sent_pair_candidate1_2_hl = gr.components.Interpretation(sent_pair_candidate1_2)
+    with gr.Row(visible=False) as demarc1:
+        gr.Markdown(
+            """---"""
+        )
     ## PAPER 2
     with gr.Row():
         with gr.Column(scale=3):
             paper_title2 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
         with gr.Column(scale=1):
+            affinity2 = gr.Textbox(label='Affinity', interactive=False, value='', visible=False)
     with gr.Row() as rel2_1:
         with gr.Column(scale=1):
+            sent_pair_score2_1 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
         with gr.Column(scale=4):
             sent_pair_source2_1 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source2_1_hl = gr.components.Interpretation(sent_pair_source2_1)
             sent_pair_candidate2_1_hl = gr.components.Interpretation(sent_pair_candidate2_1)
     with gr.Row() as rel2_2:
         with gr.Column(scale=1):
+            sent_pair_score2_2 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
         with gr.Column(scale=4):
             sent_pair_source2_2 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source2_2_hl = gr.components.Interpretation(sent_pair_source2_2)
             sent_pair_candidate2_2 = gr.Textbox(label='Sentence from Paper', visible=False)
             sent_pair_candidate2_2_hl = gr.components.Interpretation(sent_pair_candidate2_2)
+    with gr.Row(visible=False) as demarc2:
+        gr.Markdown(
+            """---"""
+        )
     ## PAPER 3
     with gr.Row():
         with gr.Column(scale=3):
             paper_title3 = gr.Textbox(label="From the reviewer's paper:", interactive=False, visible=False)
         with gr.Column(scale=1):
+            affinity3 = gr.Textbox(label='Affinity', interactive=False, value='', visible=False)
     with gr.Row() as rel3_1:
         with gr.Column(scale=1):
+            sent_pair_score3_1 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
         with gr.Column(scale=4):
             sent_pair_source3_1 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source3_1_hl = gr.components.Interpretation(sent_pair_source3_1)
             sent_pair_candidate3_1_hl = gr.components.Interpretation(sent_pair_candidate3_1)
     with gr.Row() as rel3_2:
         with gr.Column(scale=1):
+            sent_pair_score3_2 = gr.Textbox(label='Sentence Relevance', interactive=False, value='', visible=False)
         with gr.Column(scale=4):
             sent_pair_source3_2 = gr.Textbox(label='Sentence from Submission', visible=False)
             sent_pair_source3_2_hl = gr.components.Interpretation(sent_pair_source3_2)
         with gr.Column(scale=3):
             paper_title = gr.Textbox(label='Title', interactive=False)
         with gr.Column(scale=1):
+            affinity= gr.Textbox(label='Affinity', interactive=False, value='')
     with gr.Row():
         paper_abstract = gr.Textbox(label='Abstract', interactive=False, visible=False)
             sent_pair_candidate3_2,
             sent_pair_candidate3_2_hl,
             see_more_rel_btn,
+            demarc1,
+            demarc2,
+            info,
         ]
     )

score.py CHANGED Viewed

@@ -112,7 +112,7 @@ def mark_words(query_sents, words, all_words, sent_start_id, sent_ids, sent_scor
                     get_match_phrase(query_words, all_words[sent_start_id[sid]:])
         # update selected phrase scores (-1 meaning a different color in gradio)
-        word_scores[is_selected_sent+is_selected_phrase==2] = -1
         output[i] = {
             'is_selected_sent': is_selected_sent,
@@ -154,8 +154,9 @@ def get_highlight_info(model, text1, text2, K=None):
         q_words = word_tokenize(q_sent)
         c_words = word_tokenize(c_sent)
         mask1, mask2 = get_match_phrase(q_words, c_words)
-        mask1 *= -1 # mark matching phrases as blue
-        mask2 *= -1
         assert(len(mask1) == len(q_words) and len(mask2) == len(c_words))
         top_pairs_info[count] = {
             'query': {

                     get_match_phrase(query_words, all_words[sent_start_id[sid]:])
         # update selected phrase scores (-1 meaning a different color in gradio)
+        word_scores[is_selected_sent+is_selected_phrase==2] = -0.5
         output[i] = {
             'is_selected_sent': is_selected_sent,
         q_words = word_tokenize(q_sent)
         c_words = word_tokenize(c_sent)
         mask1, mask2 = get_match_phrase(q_words, c_words)
+        sc = 0.5
+        mask1 *= -sc # mark matching phrases as blue (-1: darkest)
+        mask2 *= -sc # mark matching phrases as blue
         assert(len(mask1) == len(q_words) and len(mask2) == len(c_words))
         top_pairs_info[count] = {
             'query': {