Spaces:

MaloYY
/

ArxivTopicPicker

Sleeping

App Files Files Community

Pavel Malov commited on Apr 16, 2023

Commit

28f6ce1

1 Parent(s): d2af509

Add model

Browse files

Files changed (5) hide show

app.py +15 -12
inference.py +81 -0
requirements.txt +2 -0
resources/model.ckpt +3 -0
resources/tag_mapping.json +172 -0

app.py CHANGED Viewed

@@ -1,24 +1,27 @@
 import streamlit as st
 st.set_page_config(layout="wide")
-st.markdown("""
-<style>
-.big-font {
-    font-size:300px !important;
-}
-</style>
-""", unsafe_allow_html=True)
-st.title("ArxivTitlePicker")
 st.write("This app helps define category of your scientific paper based on its name and abstract.")
 name = st.text_input("Paste here name of your paper")
 abstract = st.text_area("Paste here abstract of your paper")
-if name != '':
-    st.text("Your paper:\nName: " + name + '.\nAbstract: ' + abstract)
 if st.button("Start processing"):
     if name == '':
-        st.write('<p style="font-family:sans-serif; color:Red; font-size: 21px;">Please, provide name of the paper!🙇‍♂️</p>', unsafe_allow_html=True)

 import streamlit as st
+from inference import InferenceModel
 st.set_page_config(layout="wide")
+st.title("ArxivTopicPicker")
 st.write("This app helps define category of your scientific paper based on its name and abstract.")
 name = st.text_input("Paste here name of your paper")
 abstract = st.text_area("Paste here abstract of your paper")
+model = InferenceModel()
+model.inference('load')
+# if name != '':
+#     st.text("Your paper:\n\tName: " + name + '.\n\tAbstract: ' + abstract)
 if st.button("Start processing"):
     if name == '':
+        st.write('<p style="font-family:sans-serif; color:Red; font-size: 21px;">Please, provide name of the paper!🙇‍♂️</p>', unsafe_allow_html=True)
+    else:
+        input_text = name + '. ' + abstract if abstract != '' else name + '.'
+        top_topics = model.inference(input_text)
+        if len(top_topics) == 0:
+            st.text("We don't know yet😰")
+        else:
+            st.text(top_topics)

inference.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import json
+import torch
+from torch import nn
+from typing import List, Dict, Set
+from pathlib import Path
+from transformers import DistilBertTokenizer, DistilBertModel
+class Nnet(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.nnet = nn.Sequential(
+            nn.Linear(768, 256),
+            nn.ReLU(),
+            nn.BatchNorm1d(256),
+            nn.Linear(256, 85)
+        )
+    def forward(self, x):
+        return self.nnet(x)
+class ClassificationHead(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.nnet = Nnet()
+        ckpt = torch.load("resources/model.ckpt")
+        self.nnet.load_state_dict(ckpt['state_dict'], strict=False)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.nnet(x.unsqueeze(0))
+class InferenceModel:
+    def __init__(self) -> None:
+        self.tokenizer: DistilBertTokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
+        self.bert: DistilBertModel = DistilBertModel.from_pretrained("distilbert-base-uncased")
+        self.head: nn.Module = ClassificationHead()
+        values: Set = set(json.loads(Path('resources/tag_mapping.json').read_text()).values())
+        values.remove('')
+        self.mapping: Dict = dict()
+        for i, val in enumerate(values):
+            self.mapping[i] = val
+    def topp(self, probs: torch.Tensor):
+        # sort probs
+        sorted_probs, sorted_inds = torch.sort(probs, descending=True)
+        # accumulate probs
+        accum = torch.cumsum(sorted_probs, dim=0)
+        # get index of the first element where cumsum reached 0.95
+        ind = torch.nonzero(accum > 0.95)[0]
+        return sorted_inds[:ind]
+    def get_lables(self, classes: torch.Tensor) -> List[str]:
+        output = ""
+        for cls in classes.numpy():
+            output += self.mapping[cls] + '\n'
+        return output
+    def inference(self, x: str) -> List[str]:
+        self.bert.eval()
+        self.head.eval()
+        with torch.no_grad():
+            # tokenize: str -> Tokens
+            encoded_input = self.tokenizer(x, return_tensors='pt', truncation=True)
+            # get embedding: Tokens -> Embeddings -> MeanEmbedding
+            embeddings = self.bert(**encoded_input)
+            mean_embedding = embeddings[0].mean(dim=1)[0]
+            # get probs: MeanEmbedding -> Probs
+            probs = self.head(mean_embedding).softmax(-1)[0]
+        # get top_p classes: Probs -> 95% classes
+        topp_calsses = self.topp(probs)
+        print(probs)
+        # map classes to lables
+        return self.get_lables(topp_calsses)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ torch==1.13
2	+ transformers

resources/model.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d581cc499259712e58a5cf251c7c2d8054d8d67cad61bde6c0e936ff4e285ca
+size 2643089

resources/tag_mapping.json ADDED Viewed

	@@ -0,0 +1,172 @@

+{
+    "60g15": "Probability",
+    "62-07": "Statistics Theory",
+    "62f15": "Parametric inference",
+    "62g08": "Nonparametric inference",
+    "62h30": "Multivariate analysis",
+    "62m45": "Inference from stochastic processes",
+    "65k10": "Mathematical programming, optimization and variational techniques",
+    "68q32": "Theory of computing",
+    "68t01": "Artificial intelligence",
+    "68t05": "Artificial intelligence",
+    "68t10": "Artificial intelligence",
+    "68t20": "Artificial intelligence",
+    "68t27": "Artificial intelligence",
+    "68t30": "Artificial intelligence",
+    "68t37": "Artificial intelligence",
+    "68t40": "Artificial intelligence",
+    "68t45": "Artificial intelligence",
+    "68t50": "Artificial intelligence",
+    "68txx": "Artificial intelligence",
+    "68u10": "Computing methodologies and applications",
+    "90c25": "Mathematical programming",
+    "90c26": "Mathematical programming",
+    "90c90": "Mathematical programming",
+    "91f20": "Other social and behavioral sciences (mathematical treatment)",
+    "92b20": "Mathematical biology in general",
+    "94a08": "Communication, information",
+    "97r40": "Mathematics education",
+    "astro-ph.im": "Instrumentation and Methods for Astrophysics",
+    "c.1.3": "Distributed, Parallel, and Cluster Computing",
+    "c.2.4": "Distributed, Parallel, and Cluster Computing",
+    "cmp-lg": "Computation and Language",
+    "cond-mat.dis-nn": "Disordered Systems and Neural Networks",
+    "cond-mat.stat-mech": "",
+    "cs.ai": "Artificial intelligence",
+    "cs.ar": "Hardware Architecture",
+    "cs.cc": "Computational Complexity",
+    "cs.ce": "Computational Engineering, Finance, and Science",
+    "cs.cg": "Computational Geometry",
+    "cs.cl": "Computation and Language",
+    "cs.cr": "Cryptography and Security",
+    "cs.cv": "Computer Vision and Pattern Recognition",
+    "cs.cy": "Computers and Society",
+    "cs.db": "Databases",
+    "cs.dc": "Distributed, Parallel, and Cluster Computing",
+    "cs.dl": "Digital Libraries",
+    "cs.dm": "Discrete Mathematics",
+    "cs.ds": "Data Structures and Algorithms",
+    "cs.et": "Emerging Technologies",
+    "cs.fl": "Formal Languages and Automata Theory",
+    "cs.gr": "Graphics",
+    "cs.gt": "Computer Science and Game Theory",
+    "cs.hc": "Human-Computer Interaction",
+    "cs.ir": "Information Retrieval",
+    "cs.it": "Information Theory",
+    "cs.lg": "Machine Learning",
+    "cs.lo": "Logic in Computer Science",
+    "cs.ma": "Multiagent Systems",
+    "cs.mm": "Multimedia",
+    "cs.ms": "Mathematical Software",
+    "cs.na": "Numerical Analysis",
+    "cs.ne": "Neural and Evolutionary Computing",
+    "cs.ni": "Networking and Internet Architecture",
+    "cs.pf": "Performance",
+    "cs.pl": "Programming Languages",
+    "cs.ro": "Robotics",
+    "cs.sc": "Symbolic Computation",
+    "cs.sd": "Sound",
+    "cs.se": "Software Engineering",
+    "cs.si": "Social and Information Networks",
+    "cs.sy": "Systems and Control",
+    "d.1.3": "Distributed, Parallel, and Cluster Computing",
+    "d.1.6": "Programming Languages",
+    "d.2.2": "Software Engineering",
+    "d.3.1": "Programming Languages",
+    "d.3.2": "Programming Languages",
+    "d.3.3": "Programming Languages",
+    "e.2": "Databases",
+    "e.4": "Information Theory",
+    "eess.as": "Sound",
+    "eess.iv": "Computer Vision and Pattern Recognition",
+    "eess.sp": "Signal Processing",
+    "f.1.1": "Formal Languages and Automata Theory",
+    "f.1.3": "Computational Complexity",
+    "f.2": "Data Structures and Algorithms",
+    "f.2.2": "Data Structures and Algorithms",
+    "f.4.1": "Logic in Computer Science",
+    "f.4.2": "Logic in Computer Science",
+    "g.1.2": "Numerical Analysis",
+    "g.1.3": "Numerical Analysis",
+    "g.1.6": "Numerical Analysi",
+    "g.2.2": "Discrete Mathematics",
+    "g.3": "Discrete Mathematics",
+    "h.1.1": "Information Theory",
+    "h.1.2": "Human-Computer Interaction",
+    "h.2.4": "Databases",
+    "h.2.8": "Databases",
+    "h.3.1": "Information Retrieval",
+    "h.3.3": "Information Retrieval",
+    "h.3.4": "Information Retrieval",
+    "h.3.5": "Information Retrieval",
+    "h.5.1": "Sound",
+    "h.5.2": "Sound",
+    "h.5.3": "Sound",
+    "i.2": "Artificial intelligence",
+    "i.2.0": "Artificial intelligence",
+    "i.2.1": "Artificial intelligence",
+    "i.2.10": "Artificial intelligence",
+    "i.2.11": "Artificial intelligence",
+    "i.2.2": "Artificial intelligence",
+    "i.2.3": "Artificial intelligence",
+    "i.2.4": "Artificial intelligence",
+    "i.2.6": "Artificial intelligence",
+    "i.2.7": "Artificial intelligence",
+    "i.2.8": "Artificial intelligence",
+    "i.2.9": "Artificial intelligence",
+    "i.4": "Computer Vision and Pattern Recognition",
+    "i.4.1": "Computer Vision and Pattern Recognition",
+    "i.4.10": "Computer Vision and Pattern Recognition",
+    "i.4.3": "Computer Vision and Pattern Recognition",
+    "i.4.5": "Computer Vision and Pattern Recognition",
+    "i.4.6": "Computer Vision and Pattern Recognition",
+    "i.4.7": "Computer Vision and Pattern Recognition",
+    "i.4.8": "Computer Vision and Pattern Recognition",
+    "i.4.9": "Computer Vision and Pattern Recognition",
+    "i.5": "Computer Vision and Pattern Recognition",
+    "i.5.1": "Computer Vision and Pattern Recognition",
+    "i.5.2": "Computer Vision and Pattern Recognition",
+    "i.5.3": "Computer Vision and Pattern Recognition",
+    "i.5.4": "Computer Vision and Pattern Recognition",
+    "i.5.5": "Computer Vision and Pattern Recognition",
+    "j.2": "Computer Applications",
+    "j.3": "Computer Applications",
+    "j.4": "Computer Applications",
+    "j.5": "Computer Applications",
+    "k.3.2": "Computers and Society",
+    "math.ag": "Algebraic Geometry",
+    "math.co": "Combinatorics",
+    "math.ct": "Category Theory",
+    "math.dg": "Differential Geometry",
+    "math.ds": "Dynamical Systems",
+    "math.fa": "Functional Analysis",
+    "math.it": "Information Theory",
+    "math.lo": "Logic",
+    "math.na": "Numerical Analysis",
+    "math.oc": "Optimization and Control",
+    "math.pr": "Probability",
+    "math.st": "Statistics Theory",
+    "nlin.ao": "Adaptation and Self-Organizing Systems",
+    "nlin.cd": "Chaotic Dynamics",
+    "nlin.cg": "Cellular Automata and Lattice Gases",
+    "physics.ao-ph": "Astrophysics",
+    "physics.bio-ph": "Biological Physics",
+    "physics.chem-ph": "Chemical Physics",
+    "physics.comp-ph": "Computational Physics",
+    "physics.data-an": "Data Analysis, Statistics and Probability",
+    "physics.med-ph": "Medical Physics",
+    "physics.optics": "Optics",
+    "physics.soc-ph": "Physics and Society",
+    "q-bio.bm": "Biomolecules",
+    "q-bio.gn": "Genomics",
+    "q-bio.mn": "Molecular Networks",
+    "q-bio.nc": "Neurons and Cognition",
+    "q-bio.pe": "Populations and Evolution",
+    "q-bio.qm": "Quantitative Methods",
+    "quant-ph": "Quantum Physics",
+    "stat.ap": "Applications",
+    "stat.co": "Computation",
+    "stat.me": "Methodology",
+    "stat.ml": "Machine Learning",
+    "stat.th": "Statistics Theory"
+}