Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fix empty data
Browse files
app.py
CHANGED
@@ -16,9 +16,9 @@ from background import process_arxiv_ids
|
|
16 |
from apscheduler.schedulers.background import BackgroundScheduler
|
17 |
|
18 |
gemini_api_key, hf_token, dataset_repo_id, request_arxiv_repo_id, restart_repo_id = get_secrets()
|
19 |
-
|
20 |
|
21 |
-
titles, date_dict, requested_arxiv_ids_df, arxivid2data = initialize_data(dataset_repo_id, request_arxiv_repo_id
|
22 |
|
23 |
from ui import (
|
24 |
get_paper_by_year, get_paper_by_month, get_paper_by_day,
|
|
|
16 |
from apscheduler.schedulers.background import BackgroundScheduler
|
17 |
|
18 |
gemini_api_key, hf_token, dataset_repo_id, request_arxiv_repo_id, restart_repo_id = get_secrets()
|
19 |
+
initialize_repos(dataset_repo_id, request_arxiv_repo_id, hf_token)
|
20 |
|
21 |
+
titles, date_dict, requested_arxiv_ids_df, arxivid2data = initialize_data(dataset_repo_id, request_arxiv_repo_id)
|
22 |
|
23 |
from ui import (
|
24 |
get_paper_by_year, get_paper_by_month, get_paper_by_day,
|
init.py
CHANGED
@@ -66,7 +66,7 @@ def _initialize_paper_info(source_ds):
|
|
66 |
else:
|
67 |
return [], {}, {}
|
68 |
|
69 |
-
def initialize_data(source_data_repo_id, request_data_repo_id
|
70 |
global date_dict, arxivid2data
|
71 |
global requested_arxiv_ids_df
|
72 |
|
|
|
66 |
else:
|
67 |
return [], {}, {}
|
68 |
|
69 |
+
def initialize_data(source_data_repo_id, request_data_repo_id):
|
70 |
global date_dict, arxivid2data
|
71 |
global requested_arxiv_ids_df
|
72 |
|
ui.py
CHANGED
@@ -166,9 +166,10 @@ def _filter_duplicate_arxiv_ids(arxiv_ids_to_be_added):
|
|
166 |
arxiv_ids = d['Requested arXiv IDs']
|
167 |
unique_arxiv_ids = set(list(unique_arxiv_ids) + arxiv_ids)
|
168 |
|
169 |
-
|
170 |
-
|
171 |
-
|
|
|
172 |
|
173 |
return list(set(arxiv_ids_to_be_added) - unique_arxiv_ids)
|
174 |
|
|
|
166 |
arxiv_ids = d['Requested arXiv IDs']
|
167 |
unique_arxiv_ids = set(list(unique_arxiv_ids) + arxiv_ids)
|
168 |
|
169 |
+
if len(ds2) > 1:
|
170 |
+
for d in ds2['train']:
|
171 |
+
arxiv_id = d['arxiv_id']
|
172 |
+
unique_arxiv_ids.add(arxiv_id)
|
173 |
|
174 |
return list(set(arxiv_ids_to_be_added) - unique_arxiv_ids)
|
175 |
|