Spaces:
Running
Running
Commit
·
75c3b48
1
Parent(s):
138757f
added unittest
Browse files- app.py +1 -1
- test/__init__.py +0 -0
- test/__pycache__/__init__.cpython-38.pyc +0 -0
- test/__pycache__/test.cpython-38.pyc +0 -0
- test/test.py +17 -0
- unitls.py → utils.py +6 -6
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import streamlit as st #Web App
|
2 |
import os
|
3 |
from PIL import Image
|
4 |
-
from
|
5 |
|
6 |
import pickle
|
7 |
docs = None
|
|
|
1 |
import streamlit as st #Web App
|
2 |
import os
|
3 |
from PIL import Image
|
4 |
+
from utils import *
|
5 |
|
6 |
import pickle
|
7 |
docs = None
|
test/__init__.py
ADDED
File without changes
|
test/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (151 Bytes). View file
|
|
test/__pycache__/test.cpython-38.pyc
ADDED
Binary file (1.33 kB). View file
|
|
test/test.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import unittest
|
2 |
+
import sys
|
3 |
+
sys.path.append('../')
|
4 |
+
from utils import *
|
5 |
+
import os
|
6 |
+
import shutil
|
7 |
+
|
8 |
+
|
9 |
+
class Utils(unittest.TestCase):
|
10 |
+
def test_download_pdf(self):
|
11 |
+
pdf_info = [('Serverless Applications Why When and How ', 'http://arxiv.org/pdf/2009.08173v2', ['Simon Eismann', 'Joel Scheuner', 'Erwin van Eyk', 'Maximilian Schwinger', 'Johannes Grohmann', 'Cristina L. Abad', 'Alexandru Iosup'], ['cs.SE', 'cs.DC', 'D.2.11; D.2.0; D.2.1'], 'docs', 'Simon Eismann, Joel Scheuner, Erwin van Eyk, Maximilian Schwinger, Johannes Grohmann, Cristina L. Abad, Alexandru Iosup, Serverless Applications Why When and How . arXiv [cs.SE] (2020), (available at http://arxiv.org/pdf/2009.08173v2).')]
|
12 |
+
download_pdf(pdf_info)
|
13 |
+
dowloaded_dir = 'docs/Serverless Applications Why When and How .pdf'
|
14 |
+
assert os.path.exists(dowloaded_dir) == True
|
15 |
+
shutil.rmtree(f'docs/')
|
16 |
+
if __name__ == '__main__':
|
17 |
+
unittest.main()
|
unitls.py → utils.py
RENAMED
@@ -8,7 +8,7 @@ import os
|
|
8 |
import shutil
|
9 |
import time
|
10 |
|
11 |
-
def call_arXiv_API(search_query, search_by='all', sort_by='relevance', max_results='10', folder_name='
|
12 |
'''
|
13 |
Scraps the arXiv's html to get data from each entry in a search. Entries has the following formatting:
|
14 |
<entry>\n
|
@@ -75,11 +75,11 @@ def download_pdf(pdf_info):
|
|
75 |
# if len(os.listdir(f'./{folder_name}') ) != 0:
|
76 |
# check folder is empty to avoid using papers from old runs:
|
77 |
# os.remove(f'./{folder_name}/*')
|
|
|
78 |
all_reference_text = []
|
79 |
for i,p in enumerate(stqdm(pdf_info, desc='Searching and downloading papers')):
|
80 |
-
|
81 |
-
|
82 |
-
pdf_url=p[1]
|
83 |
pdf_author=p[2]
|
84 |
pdf_category=p[3]
|
85 |
folder_name=p[4]
|
@@ -91,8 +91,8 @@ def download_pdf(pdf_info):
|
|
91 |
else:
|
92 |
shutil.rmtree(f'{folder_name}')
|
93 |
os.makedirs(f"{folder_name}")
|
94 |
-
with open(f'{folder_name}/{pdf_title}.pdf', 'wb') as
|
95 |
-
|
96 |
if i == 0:
|
97 |
st.markdown("###### Papers found:")
|
98 |
st.markdown(f"{i+1}. {pdf_citation}")
|
|
|
8 |
import shutil
|
9 |
import time
|
10 |
|
11 |
+
def call_arXiv_API(search_query, search_by='all', sort_by='relevance', max_results='10', folder_name='docs'):
|
12 |
'''
|
13 |
Scraps the arXiv's html to get data from each entry in a search. Entries has the following formatting:
|
14 |
<entry>\n
|
|
|
75 |
# if len(os.listdir(f'./{folder_name}') ) != 0:
|
76 |
# check folder is empty to avoid using papers from old runs:
|
77 |
# os.remove(f'./{folder_name}/*')
|
78 |
+
# print(pdf_info)
|
79 |
all_reference_text = []
|
80 |
for i,p in enumerate(stqdm(pdf_info, desc='Searching and downloading papers')):
|
81 |
+
pdf_title=p[0].replace(':','').replace('/','').replace('.','')
|
82 |
+
pdf_url=p[1] + '.full.pdf'
|
|
|
83 |
pdf_author=p[2]
|
84 |
pdf_category=p[3]
|
85 |
folder_name=p[4]
|
|
|
91 |
else:
|
92 |
shutil.rmtree(f'{folder_name}')
|
93 |
os.makedirs(f"{folder_name}")
|
94 |
+
with open(f'{folder_name}/{pdf_title}.pdf', 'wb') as f:
|
95 |
+
f.write(r.content)
|
96 |
if i == 0:
|
97 |
st.markdown("###### Papers found:")
|
98 |
st.markdown(f"{i+1}. {pdf_citation}")
|