Spaces:

biodatlab
/

MEDLINE-Reviewer-RecSys

Runtime error

App Files Files Community

atrytone commited on Jul 6, 2023

Commit

2a7382c

•

1 Parent(s): b73c05b

Upload 3 files

Browse files

Files changed (3) hide show

README.md +8 -5
app.py +154 -0
requirements.txt +7 -0

README.md CHANGED Viewed

@@ -1,12 +1,15 @@
 ---
-title: MEDLINE Reviewer RecSys
-emoji: 🐢
-colorFrom: purple
-colorTo: green
 sdk: gradio
 sdk_version: 3.35.2
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: NBDT Reviewer Recommendation System
+emoji: 📊
+colorFrom: indigo
+colorTo: blue
 sdk: gradio
 sdk_version: 3.35.2
 app_file: app.py
 pinned: false
+models: [biodatlab/MIReAD-Neuro]
 ---
+This space is a demo for a Reviewer Recommendation System for the Neurons, Behavior, Data Analysis and Theory Journal.
+The index being used here includes papers from a variety of authors who have published in the NBDT Journal across various years.
+The embedding model in use here is [biodatlab/MIReAD-Neuro-Large](https://huggingface.co/biodatlab/MIReAD-Neuro-Large).

app.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import gradio as gr
+from langchain.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings
+import torch
+def create_miread_embed(sents, bundle):
+    tokenizer = bundle[0]
+    model = bundle[1]
+    model.cpu()
+    tokens = tokenizer(sents,
+                       max_length=512,
+                       padding=True,
+                       truncation=True,
+                       return_tensors="pt"
+                       )
+    device = torch.device('cpu')
+    tokens = tokens.to(device)
+    with torch.no_grad():
+        out = model.bert(**tokens)
+        feature = out.last_hidden_state[:, 0, :]
+    return feature.cpu()
+def get_matches(query, k):
+    matches = vecdb.similarity_search_with_score(query, k=k)
+    return matches
+def inference(query, k=30):
+    matches = get_matches(query, k)
+    j_bucket = {}
+    n_table = []
+    a_table = []
+    scores = [round(match[1].item(), 3) for match in matches]
+    min_score = min(scores)
+    max_score = max(scores)
+    def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
+    for i, match in enumerate(matches):
+        doc = match[0]
+        score = round(normaliser(round(match[1].item(), 3)),3)
+        title = doc.metadata['title']
+        author = doc.metadata['authors'][0].title()
+        date = doc.metadata.get('date', 'None')
+        link = doc.metadata.get('link', 'None')
+        submitter = doc.metadata.get('submitter', 'None')
+        # journal = doc.metadata.get('journal', 'None').strip()
+        journal = doc.metadata['journal']
+        if (journal == None or journal.strip() == ''):
+            journal = 'None'
+        else:
+            journal = journal.strip()
+        # For journals
+        if journal not in j_bucket:
+            j_bucket[journal] = score
+        else:
+            j_bucket[journal] += score
+        # For authors
+        record = [i+1,
+                  score,
+                  author,
+                  title,
+                  link,
+                  date]
+        n_table.append(record)
+        # For abstracts
+        record = [i+1,
+                  title,
+                  author,
+                  submitter,
+                  journal,
+                  date,
+                  link,
+                  score
+                  ]
+        a_table.append(record)
+    del j_bucket['None']
+    j_table = sorted([[journal, round(score,3)] for journal,
+                     score in j_bucket.items()],
+                     key=lambda x: x[1], reverse=True)
+    j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
+    j_output = gr.Dataframe.update(value=j_table, visible=True)
+    n_output = gr.Dataframe.update(value=n_table, visible=True)
+    a_output = gr.Dataframe.update(value=a_table, visible=True)
+    return [a_output, j_output, n_output]
+model_name = "biodatlab/MIReAD-Neuro-Large"
+model_kwargs = {'device': 'cpu'}
+encode_kwargs = {'normalize_embeddings': False}
+faiss_embedder = HuggingFaceEmbeddings(
+    model_name=model_name,
+    model_kwargs=model_kwargs,
+    encode_kwargs=encode_kwargs
+)
+vecdb = FAISS.load_local("nbdt_index", faiss_embedder)
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# NBDT Recommendation Engine for Editors")
+    gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
+    It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
+    To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
+    Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
+    The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")
+    abst = gr.Textbox(label="Abstract", lines=10)
+    k = gr.Slider(1, 100, step=1, value=50,
+                  label="Number of matches to consider")
+    action_btn = gr.Button(value="Find Matches")
+    with gr.Tab("Authors"):
+        n_output = gr.Dataframe(
+            headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
+            datatype=['number', 'number', 'str', 'str', 'str', 'str'],
+            col_count=(6, "fixed"),
+            wrap=True,
+            visible=False
+        )
+    with gr.Tab("Abstracts"):
+        a_output = gr.Dataframe(
+            headers=['No.', 'Title', 'Author', 'Corresponding Author',
+                     'Journal', 'Date', 'Link', 'Score'],
+            datatype=['number', 'str', 'str', 'str',
+                      'str', 'str', 'str', 'number'],
+            col_count=(8, "fixed"),
+            wrap=True,
+            visible=False
+        )
+    with gr.Tab("Journals"):
+        j_output = gr.Dataframe(
+            headers=['No.', 'Name', 'Score'],
+            datatype=['number', 'str', 'number'],
+            col_count=(3, "fixed"),
+            wrap=True,
+            visible=False
+        )
+    action_btn.click(fn=inference,
+                     inputs=[
+                         abst,
+                         k,
+                     ],
+                     outputs=[a_output, j_output, n_output],
+                     api_name="neurojane")
+demo.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+sentence-transformers
+torch
+datasets
+sentencepiece
+langchain
+faiss-cpu
+accelerate