Spaces:

mehradans92
/

decode-elm

Sleeping

mehradans92 commited on Feb 17, 2023

Commit

75c3b48

1 Parent(s): 138757f

added unittest

Files changed (6) hide show

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import streamlit as st  #Web App
 import os
 from PIL import Image
-from unitls import *
 import pickle
 docs = None

 import streamlit as st  #Web App
 import os
 from PIL import Image
+from utils import *
 import pickle
 docs = None

test/__init__.py ADDED Viewed

File without changes

test/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (151 Bytes). View file

test/__pycache__/test.cpython-38.pyc ADDED Viewed

Binary file (1.33 kB). View file

test/test.py ADDED Viewed

+import unittest
+import sys
+sys.path.append('../')
+from utils import *
+import os
+import shutil
+class Utils(unittest.TestCase):
+    def test_download_pdf(self):
+        pdf_info = [('Serverless Applications  Why  When  and How ', 'http://arxiv.org/pdf/2009.08173v2', ['Simon Eismann', 'Joel Scheuner', 'Erwin van Eyk', 'Maximilian Schwinger', 'Johannes Grohmann', 'Cristina L. Abad', 'Alexandru Iosup'], ['cs.SE', 'cs.DC', 'D.2.11; D.2.0; D.2.1'], 'docs', 'Simon Eismann, Joel Scheuner, Erwin van Eyk, Maximilian Schwinger, Johannes Grohmann, Cristina L. Abad, Alexandru Iosup, Serverless Applications  Why  When  and How . arXiv [cs.SE] (2020), (available at http://arxiv.org/pdf/2009.08173v2).')]
+        download_pdf(pdf_info)
+        dowloaded_dir = 'docs/Serverless Applications  Why  When  and How .pdf'
+        assert os.path.exists(dowloaded_dir) == True
+        shutil.rmtree(f'docs/')
+if __name__ == '__main__':
+    unittest.main()

unitls.py → utils.py RENAMED Viewed

@@ -8,7 +8,7 @@ import os
 import shutil
 import time
-def call_arXiv_API(search_query, search_by='all', sort_by='relevance', max_results='10', folder_name='arxiv-dl'):
     '''
       Scraps the arXiv's html to get data from each entry in a search. Entries has the following formatting:
       <entry>\n
@@ -75,11 +75,11 @@ def download_pdf(pdf_info):
     # if len(os.listdir(f'./{folder_name}') ) != 0:
             # check folder is empty to avoid using papers from old runs:
             # os.remove(f'./{folder_name}/*')
     all_reference_text = []
     for i,p in enumerate(stqdm(pdf_info, desc='Searching and downloading papers')):
-        pdf_title=p[0]
-        pdf_url=p[1]
         pdf_author=p[2]
         pdf_category=p[3]
         folder_name=p[4]
@@ -91,8 +91,8 @@ def download_pdf(pdf_info):
             else:
                 shutil.rmtree(f'{folder_name}')
                 os.makedirs(f"{folder_name}")
-        with open(f'{folder_name}/{pdf_title}.pdf', 'wb') as currP:
-            currP.write(r.content)
         if i == 0:
             st.markdown("###### Papers found:")
         st.markdown(f"{i+1}. {pdf_citation}")

 import shutil
 import time
+def call_arXiv_API(search_query, search_by='all', sort_by='relevance', max_results='10', folder_name='docs'):
     '''
       Scraps the arXiv's html to get data from each entry in a search. Entries has the following formatting:
       <entry>\n
     # if len(os.listdir(f'./{folder_name}') ) != 0:
             # check folder is empty to avoid using papers from old runs:
             # os.remove(f'./{folder_name}/*')
+    # print(pdf_info)
     all_reference_text = []
     for i,p in enumerate(stqdm(pdf_info, desc='Searching and downloading papers')):
+        pdf_title=p[0].replace(':','').replace('/','').replace('.','')
+        pdf_url=p[1] + '.full.pdf'
         pdf_author=p[2]
         pdf_category=p[3]
         folder_name=p[4]
             else:
                 shutil.rmtree(f'{folder_name}')
                 os.makedirs(f"{folder_name}")
+        with open(f'{folder_name}/{pdf_title}.pdf', 'wb') as f:
+            f.write(r.content)
         if i == 0:
             st.markdown("###### Papers found:")
         st.markdown(f"{i+1}. {pdf_citation}")