atrytone commited on
Commit
2a7382c
β€’
1 Parent(s): b73c05b

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +8 -5
  2. app.py +154 -0
  3. requirements.txt +7 -0
README.md CHANGED
@@ -1,12 +1,15 @@
1
  ---
2
- title: MEDLINE Reviewer RecSys
3
- emoji: 🐒
4
- colorFrom: purple
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.35.2
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: NBDT Reviewer Recommendation System
3
+ emoji: πŸ“Š
4
+ colorFrom: indigo
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.35.2
8
  app_file: app.py
9
  pinned: false
10
+ models: [biodatlab/MIReAD-Neuro]
11
  ---
12
 
13
+ This space is a demo for a Reviewer Recommendation System for the Neurons, Behavior, Data Analysis and Theory Journal.
14
+ The index being used here includes papers from a variety of authors who have published in the NBDT Journal across various years.
15
+ The embedding model in use here is [biodatlab/MIReAD-Neuro-Large](https://huggingface.co/biodatlab/MIReAD-Neuro-Large).
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ import torch
5
+
6
+
7
+ def create_miread_embed(sents, bundle):
8
+ tokenizer = bundle[0]
9
+ model = bundle[1]
10
+ model.cpu()
11
+ tokens = tokenizer(sents,
12
+ max_length=512,
13
+ padding=True,
14
+ truncation=True,
15
+ return_tensors="pt"
16
+ )
17
+ device = torch.device('cpu')
18
+ tokens = tokens.to(device)
19
+ with torch.no_grad():
20
+ out = model.bert(**tokens)
21
+ feature = out.last_hidden_state[:, 0, :]
22
+ return feature.cpu()
23
+
24
+
25
+ def get_matches(query, k):
26
+ matches = vecdb.similarity_search_with_score(query, k=k)
27
+ return matches
28
+
29
+
30
+ def inference(query, k=30):
31
+ matches = get_matches(query, k)
32
+ j_bucket = {}
33
+ n_table = []
34
+ a_table = []
35
+ scores = [round(match[1].item(), 3) for match in matches]
36
+ min_score = min(scores)
37
+ max_score = max(scores)
38
+ def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
39
+ for i, match in enumerate(matches):
40
+ doc = match[0]
41
+ score = round(normaliser(round(match[1].item(), 3)),3)
42
+ title = doc.metadata['title']
43
+ author = doc.metadata['authors'][0].title()
44
+ date = doc.metadata.get('date', 'None')
45
+ link = doc.metadata.get('link', 'None')
46
+ submitter = doc.metadata.get('submitter', 'None')
47
+ # journal = doc.metadata.get('journal', 'None').strip()
48
+ journal = doc.metadata['journal']
49
+ if (journal == None or journal.strip() == ''):
50
+ journal = 'None'
51
+ else:
52
+ journal = journal.strip()
53
+ # For journals
54
+ if journal not in j_bucket:
55
+ j_bucket[journal] = score
56
+ else:
57
+ j_bucket[journal] += score
58
+
59
+ # For authors
60
+ record = [i+1,
61
+ score,
62
+ author,
63
+ title,
64
+ link,
65
+ date]
66
+ n_table.append(record)
67
+
68
+ # For abstracts
69
+ record = [i+1,
70
+ title,
71
+ author,
72
+ submitter,
73
+ journal,
74
+ date,
75
+ link,
76
+ score
77
+ ]
78
+ a_table.append(record)
79
+
80
+ del j_bucket['None']
81
+ j_table = sorted([[journal, round(score,3)] for journal,
82
+ score in j_bucket.items()],
83
+ key=lambda x: x[1], reverse=True)
84
+ j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
85
+ j_output = gr.Dataframe.update(value=j_table, visible=True)
86
+ n_output = gr.Dataframe.update(value=n_table, visible=True)
87
+ a_output = gr.Dataframe.update(value=a_table, visible=True)
88
+
89
+ return [a_output, j_output, n_output]
90
+
91
+
92
+ model_name = "biodatlab/MIReAD-Neuro-Large"
93
+ model_kwargs = {'device': 'cpu'}
94
+ encode_kwargs = {'normalize_embeddings': False}
95
+ faiss_embedder = HuggingFaceEmbeddings(
96
+ model_name=model_name,
97
+ model_kwargs=model_kwargs,
98
+ encode_kwargs=encode_kwargs
99
+ )
100
+
101
+ vecdb = FAISS.load_local("nbdt_index", faiss_embedder)
102
+
103
+
104
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
105
+ gr.Markdown("# NBDT Recommendation Engine for Editors")
106
+ gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
107
+ It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
108
+ To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
109
+ Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
110
+ The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")
111
+
112
+ abst = gr.Textbox(label="Abstract", lines=10)
113
+
114
+ k = gr.Slider(1, 100, step=1, value=50,
115
+ label="Number of matches to consider")
116
+
117
+ action_btn = gr.Button(value="Find Matches")
118
+
119
+ with gr.Tab("Authors"):
120
+ n_output = gr.Dataframe(
121
+ headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
122
+ datatype=['number', 'number', 'str', 'str', 'str', 'str'],
123
+ col_count=(6, "fixed"),
124
+ wrap=True,
125
+ visible=False
126
+ )
127
+ with gr.Tab("Abstracts"):
128
+ a_output = gr.Dataframe(
129
+ headers=['No.', 'Title', 'Author', 'Corresponding Author',
130
+ 'Journal', 'Date', 'Link', 'Score'],
131
+ datatype=['number', 'str', 'str', 'str',
132
+ 'str', 'str', 'str', 'number'],
133
+ col_count=(8, "fixed"),
134
+ wrap=True,
135
+ visible=False
136
+ )
137
+ with gr.Tab("Journals"):
138
+ j_output = gr.Dataframe(
139
+ headers=['No.', 'Name', 'Score'],
140
+ datatype=['number', 'str', 'number'],
141
+ col_count=(3, "fixed"),
142
+ wrap=True,
143
+ visible=False
144
+ )
145
+
146
+ action_btn.click(fn=inference,
147
+ inputs=[
148
+ abst,
149
+ k,
150
+ ],
151
+ outputs=[a_output, j_output, n_output],
152
+ api_name="neurojane")
153
+
154
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ sentence-transformers
2
+ torch
3
+ datasets
4
+ sentencepiece
5
+ langchain
6
+ faiss-cpu
7
+ accelerate