bluebalam commited on
Commit
5cb07ef
β€’
1 Parent(s): 5b6c150

app upgrade, ignore files, add requirements, and update README

Browse files
Files changed (4) hide show
  1. .gitignore +137 -0
  2. README.md +8 -36
  3. app.py +55 -0
  4. requirements.txt +8 -0
.gitignore ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # pycharm
75
+ .idea
76
+
77
+ # ipynb
78
+ *.ipynb
79
+ # PyBuilder
80
+ target/
81
+
82
+ # Jupyter Notebook
83
+ .ipynb_checkpoints
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100
+ __pypackages__/
101
+
102
+ # Celery stuff
103
+ celerybeat-schedule
104
+ celerybeat.pid
105
+
106
+ # SageMath parsed files
107
+ *.sage.py
108
+
109
+ # Environments
110
+ .env
111
+ .venv
112
+ env/
113
+ venv/
114
+ ENV/
115
+ env.bak/
116
+ venv.bak/
117
+
118
+ # Spyder project settings
119
+ .spyderproject
120
+ .spyproject
121
+
122
+ # Rope project settings
123
+ .ropeproject
124
+
125
+ # mkdocs documentation
126
+ /site
127
+
128
+ # mypy
129
+ .mypy_cache/
130
+ .dmypy.json
131
+ dmypy.json
132
+
133
+ # Pyre type checker
134
+ .pyre/
135
+
136
+ # idea
137
+ .idea
README.md CHANGED
@@ -1,46 +1,18 @@
1
  ---
2
- title: Paper Rec
3
- emoji: πŸ’©
4
- colorFrom: gray
5
  colorTo: blue
6
  sdk: gradio
7
  app_file: app.py
8
- pinned: false
9
  license: mit
10
  ---
11
 
12
- # Configuration
13
 
14
- `title`: _string_
15
- Display title for the Space
16
 
17
- `emoji`: _string_
18
- Space emoji (emoji-only character allowed)
19
 
20
- `colorFrom`: _string_
21
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
22
-
23
- `colorTo`: _string_
24
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
25
-
26
- `sdk`: _string_
27
- Can be either `gradio`, `streamlit`, or `static`
28
-
29
- `sdk_version` : _string_
30
- Only applicable for `streamlit` SDK.
31
- See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
32
-
33
- `app_file`: _string_
34
- Path to your main application file (which contains either `gradio` or `streamlit` Python code, or `static` html code).
35
- Path is relative to the root of the repository.
36
-
37
- `models`: _List[string]_
38
- HF model IDs (like "gpt2" or "deepset/roberta-base-squad2") used in the Space.
39
- Will be parsed automatically from your code if not specified here.
40
-
41
- `datasets`: _List[string]_
42
- HF dataset IDs (like "common_voice" or "oscar-corpus/OSCAR-2109") used in the Space.
43
- Will be parsed automatically from your code if not specified here.
44
-
45
- `pinned`: _boolean_
46
- Whether the Space stays on top of your list.
 
1
  ---
2
+ title: `paper-rec`
3
+ emoji: πŸ“ƒ πŸ€– πŸ’™
4
+ colorFrom: indigo
5
  colorTo: blue
6
  sdk: gradio
7
  app_file: app.py
8
+ pinned: true
9
  license: mit
10
  ---
11
 
12
+ # `paper-rec` demo
13
 
14
+ What paper in ML/AI should I read next? It is difficult to choose from all great research publications published daily. This demo gives you a personalized selection of papers from the latest scientific contributions available in [arXiv](https://arxiv.org/).
 
15
 
16
+ You just input the title or abstract (or both) of paper(s) you liked in the past or you can also use keywords of topics of interest and get the top-10 article recommednations tailored to your tase.
 
17
 
18
+ Enjoy!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+
4
+
5
+ from paper_rec import recommender, etl
6
+ from gradio.inputs import Textbox
7
+
8
+
9
+ def recommend(txt):
10
+ if len(txt.strip()) <= 0:
11
+ return {"msg": "no recommendations available for the input text."}
12
+
13
+ top_n = 10
14
+ # model user preferences:
15
+ cleaned_txt = etl.clean_text(txt)
16
+ sentences = etl.get_sentences_from_txt(txt)
17
+ rec = recommender.Recommender()
18
+ # loading data and model from HF
19
+ rec.load_data()
20
+ rec.load_model()
21
+ # compute user embedding
22
+ user_embedding = torch.from_numpy(rec.embedding(sentences))
23
+ # get recommendations based on user preferences
24
+ recs = rec.recommend(user_embedding, top_k=100)
25
+ # deduplicate
26
+ recs_output = []
27
+ seen_paper = set()
28
+ for p in recs:
29
+ if p["id"] not in seen_paper:
30
+ recs_output.append({"id": p["id"],
31
+ "title": p["title"],
32
+ "abstract": p["authors"],
33
+ "abstract": p["abstract"]
34
+ })
35
+ seen_paper.add(p["id"])
36
+ if len(recs_output) >= top_n:
37
+ break
38
+
39
+ # report top-n
40
+ return recs_output
41
+
42
+
43
+ def inputs():
44
+ pass
45
+
46
+
47
+ title = "Interactive demo: paper-rec"
48
+ description = "Demo that recommends you what recent papers in AI/ML to read next based on what you like."
49
+
50
+ iface = gr.Interface(fn=recommend,
51
+ inputs=[Textbox(lines=10, placeholder="Titles and abstracts from papers you like", default="", label="Sample of what I like <3")],
52
+ outputs="json",
53
+ layout='vertical'
54
+ )
55
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ sentence_transformers
3
+ huggingface-hub
4
+ feedparser
5
+ beautifulsoup4
6
+ lxml
7
+ git+https://github.com/bluebalam/paper-rec.git
8
+ gradio