mehradans92 commited on
Commit
75c3b48
·
1 Parent(s): 138757f

added unittest

Browse files
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st #Web App
2
  import os
3
  from PIL import Image
4
- from unitls import *
5
 
6
  import pickle
7
  docs = None
 
1
  import streamlit as st #Web App
2
  import os
3
  from PIL import Image
4
+ from utils import *
5
 
6
  import pickle
7
  docs = None
test/__init__.py ADDED
File without changes
test/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (151 Bytes). View file
 
test/__pycache__/test.cpython-38.pyc ADDED
Binary file (1.33 kB). View file
 
test/test.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ import sys
3
+ sys.path.append('../')
4
+ from utils import *
5
+ import os
6
+ import shutil
7
+
8
+
9
+ class Utils(unittest.TestCase):
10
+ def test_download_pdf(self):
11
+ pdf_info = [('Serverless Applications Why When and How ', 'http://arxiv.org/pdf/2009.08173v2', ['Simon Eismann', 'Joel Scheuner', 'Erwin van Eyk', 'Maximilian Schwinger', 'Johannes Grohmann', 'Cristina L. Abad', 'Alexandru Iosup'], ['cs.SE', 'cs.DC', 'D.2.11; D.2.0; D.2.1'], 'docs', 'Simon Eismann, Joel Scheuner, Erwin van Eyk, Maximilian Schwinger, Johannes Grohmann, Cristina L. Abad, Alexandru Iosup, Serverless Applications Why When and How . arXiv [cs.SE] (2020), (available at http://arxiv.org/pdf/2009.08173v2).')]
12
+ download_pdf(pdf_info)
13
+ dowloaded_dir = 'docs/Serverless Applications Why When and How .pdf'
14
+ assert os.path.exists(dowloaded_dir) == True
15
+ shutil.rmtree(f'docs/')
16
+ if __name__ == '__main__':
17
+ unittest.main()
unitls.py → utils.py RENAMED
@@ -8,7 +8,7 @@ import os
8
  import shutil
9
  import time
10
 
11
- def call_arXiv_API(search_query, search_by='all', sort_by='relevance', max_results='10', folder_name='arxiv-dl'):
12
  '''
13
  Scraps the arXiv's html to get data from each entry in a search. Entries has the following formatting:
14
  <entry>\n
@@ -75,11 +75,11 @@ def download_pdf(pdf_info):
75
  # if len(os.listdir(f'./{folder_name}') ) != 0:
76
  # check folder is empty to avoid using papers from old runs:
77
  # os.remove(f'./{folder_name}/*')
 
78
  all_reference_text = []
79
  for i,p in enumerate(stqdm(pdf_info, desc='Searching and downloading papers')):
80
-
81
- pdf_title=p[0]
82
- pdf_url=p[1]
83
  pdf_author=p[2]
84
  pdf_category=p[3]
85
  folder_name=p[4]
@@ -91,8 +91,8 @@ def download_pdf(pdf_info):
91
  else:
92
  shutil.rmtree(f'{folder_name}')
93
  os.makedirs(f"{folder_name}")
94
- with open(f'{folder_name}/{pdf_title}.pdf', 'wb') as currP:
95
- currP.write(r.content)
96
  if i == 0:
97
  st.markdown("###### Papers found:")
98
  st.markdown(f"{i+1}. {pdf_citation}")
 
8
  import shutil
9
  import time
10
 
11
+ def call_arXiv_API(search_query, search_by='all', sort_by='relevance', max_results='10', folder_name='docs'):
12
  '''
13
  Scraps the arXiv's html to get data from each entry in a search. Entries has the following formatting:
14
  <entry>\n
 
75
  # if len(os.listdir(f'./{folder_name}') ) != 0:
76
  # check folder is empty to avoid using papers from old runs:
77
  # os.remove(f'./{folder_name}/*')
78
+ # print(pdf_info)
79
  all_reference_text = []
80
  for i,p in enumerate(stqdm(pdf_info, desc='Searching and downloading papers')):
81
+ pdf_title=p[0].replace(':','').replace('/','').replace('.','')
82
+ pdf_url=p[1] + '.full.pdf'
 
83
  pdf_author=p[2]
84
  pdf_category=p[3]
85
  folder_name=p[4]
 
91
  else:
92
  shutil.rmtree(f'{folder_name}')
93
  os.makedirs(f"{folder_name}")
94
+ with open(f'{folder_name}/{pdf_title}.pdf', 'wb') as f:
95
+ f.write(r.content)
96
  if i == 0:
97
  st.markdown("###### Papers found:")
98
  st.markdown(f"{i+1}. {pdf_citation}")