Spaces:
Running
Running
dataset
Browse files- arxiv_agent.py +119 -63
- utils.py +26 -9
arxiv_agent.py
CHANGED
@@ -4,8 +4,11 @@ import json
|
|
4 |
import time
|
5 |
import datetime
|
6 |
from xml.etree import ElementTree
|
7 |
-
|
|
|
|
|
8 |
import requests
|
|
|
9 |
import warnings
|
10 |
warnings.filterwarnings("ignore")
|
11 |
os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
@@ -13,6 +16,24 @@ from utils import *
|
|
13 |
import thread6
|
14 |
MAX_DAILY_PAPER = 200
|
15 |
DAY_TIME = 60 * 60 * 24
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
def feedback_thought(input_ls): # preload
|
18 |
agent, query, ansA, ansB, feedbackA, feedbackB = input_ls
|
@@ -39,8 +60,9 @@ def feedback_thought(input_ls): # preload
|
|
39 |
json_data[date][query]["feedbackA"] = feedbackA
|
40 |
json_data[date][query]["answerB"] = (ansB)
|
41 |
json_data[date][query]["feedbackB"] = feedbackB
|
42 |
-
with
|
43 |
-
|
|
|
44 |
|
45 |
preferred_ans = ""
|
46 |
if feedbackA == 1:
|
@@ -71,12 +93,12 @@ def feedback_thought(input_ls): # preload
|
|
71 |
agent.thought_embedding[date] = [get_bert_embedding([tem_thought])[0]]
|
72 |
else:
|
73 |
agent.thought_embedding[date].append(get_bert_embedding([tem_thought])[0])
|
|
|
|
|
|
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
with open(agent.thought_embedding_path, "wb") as f:
|
79 |
-
pickle.dump(agent.thought_embedding, f)
|
80 |
|
81 |
# return "Give feedback successfully!"
|
82 |
|
@@ -96,7 +118,7 @@ def dailyDownload(agent_ls):
|
|
96 |
|
97 |
json_file = agent.dataset_path
|
98 |
|
99 |
-
update_file=update_json_file(json_file, data_collector)
|
100 |
|
101 |
time_chunks_embed={}
|
102 |
|
@@ -105,43 +127,53 @@ def dailyDownload(agent_ls):
|
|
105 |
papers = data[date]['abstract']
|
106 |
papers_embedding=get_bert_embedding(papers)
|
107 |
time_chunks_embed[date.strftime("%m/%d/%Y")] = papers_embedding
|
108 |
-
update_paper_file=update_pickle_file(agent.embedding_path,time_chunks_embed)
|
109 |
agent.paper = update_file
|
110 |
agent.paper_embedding = update_paper_file
|
111 |
print("Today is " + agent.newest_day.strftime("%m/%d/%Y"))
|
112 |
|
113 |
def dailySave(agent_ls):
|
114 |
agent = agent_ls[0]
|
|
|
|
|
115 |
while True:
|
116 |
time.sleep(DAY_TIME)
|
117 |
-
with
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
with open(agent.thought_embedding_path, "wb") as f:
|
124 |
-
pickle.dump(agent.thought_embedding, f)
|
125 |
-
|
126 |
-
with open(agent.profile_path,"w") as f:
|
127 |
-
json.dump(agent.profile,f)
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
class ArxivAgent:
|
131 |
def __init__(self):
|
132 |
|
133 |
-
self.dataset_path = "
|
134 |
-
self.thought_path = "
|
135 |
-
self.trend_idea_path = "
|
136 |
-
self.profile_path = "
|
137 |
-
|
138 |
-
|
139 |
-
self.
|
140 |
-
|
141 |
-
|
|
|
142 |
self.today = datetime.datetime.now().strftime("%m/%d/%Y")
|
143 |
|
144 |
self.newest_day = ""
|
|
|
|
|
|
|
|
|
|
|
145 |
self.load_cache()
|
146 |
|
147 |
self.download()
|
@@ -315,15 +347,21 @@ class ArxivAgent:
|
|
315 |
data_collector.append(data)
|
316 |
|
317 |
json_file = self.dataset_path
|
318 |
-
|
|
|
|
|
|
|
|
|
319 |
with open(json_file,'w')as a:
|
320 |
-
print(
|
321 |
|
322 |
-
update_file=update_json_file(json_file, data_collector)
|
323 |
|
324 |
-
|
|
|
|
|
325 |
with open(self.embedding_path,'wb')as a:
|
326 |
-
print(
|
327 |
time_chunks_embed={}
|
328 |
|
329 |
for data in data_collector:
|
@@ -331,75 +369,87 @@ class ArxivAgent:
|
|
331 |
papers = data[date]['abstract']
|
332 |
papers_embedding=get_bert_embedding(papers)
|
333 |
time_chunks_embed[date.strftime("%m/%d/%Y")] = papers_embedding
|
334 |
-
update_paper_file=update_pickle_file(self.embedding_path,time_chunks_embed)
|
335 |
self.paper = update_file
|
336 |
self.paper_embedding = update_paper_file
|
337 |
|
338 |
|
339 |
|
340 |
def load_cache(self):
|
341 |
-
filename = self.feedback_path
|
342 |
|
343 |
-
|
|
|
|
|
|
|
344 |
with open(filename,"rb") as f:
|
345 |
content = f.read()
|
346 |
if not content:
|
347 |
m = {}
|
348 |
else:
|
349 |
m = json.loads(content)
|
350 |
-
|
351 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
352 |
m = {}
|
353 |
self.feedback = m.copy()
|
354 |
|
355 |
filename = self.trend_idea_path
|
356 |
|
357 |
-
if os.path.exists(filename):
|
|
|
|
|
358 |
with open(filename,"rb") as f:
|
359 |
content = f.read()
|
360 |
if not content:
|
361 |
m = {}
|
362 |
else:
|
363 |
m = json.loads(content)
|
364 |
-
|
365 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
366 |
m = {}
|
367 |
self.trend_idea = m.copy()
|
368 |
|
|
|
369 |
filename = self.profile_path
|
370 |
-
if os.path.exists(filename):
|
|
|
|
|
371 |
with open(filename,"rb") as f:
|
372 |
content = f.read()
|
373 |
if not content:
|
374 |
m = {}
|
375 |
else:
|
376 |
m = json.loads(content)
|
377 |
-
|
378 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
379 |
m = {}
|
380 |
self.profile = m.copy()
|
381 |
|
|
|
382 |
filename = self.thought_path
|
383 |
filename_emb = self.thought_embedding_path
|
384 |
-
if os.path.exists(filename):
|
|
|
|
|
385 |
with open(filename,"rb") as f:
|
386 |
content = f.read()
|
387 |
if not content:
|
388 |
m = {}
|
389 |
else:
|
390 |
m = json.loads(content)
|
391 |
-
|
392 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
393 |
m = {}
|
394 |
|
395 |
-
if os.path.exists(filename_emb):
|
|
|
|
|
396 |
with open(filename_emb,"rb") as f:
|
397 |
content = f.read()
|
398 |
if not content:
|
399 |
m_emb = {}
|
400 |
else:
|
401 |
m_emb = pickle.loads(content)
|
402 |
-
|
403 |
with open(filename_emb, mode='w', encoding='utf-8') as ff:
|
404 |
m_emb = {}
|
405 |
|
@@ -407,6 +457,23 @@ class ArxivAgent:
|
|
407 |
self.thought_embedding = m_emb.copy()
|
408 |
|
409 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
410 |
|
411 |
|
412 |
|
@@ -421,27 +488,16 @@ class ArxivAgent:
|
|
421 |
def update_comment(self, comment):
|
422 |
date = datetime.datetime.now().strftime("%m/%d/%Y")
|
423 |
|
424 |
-
|
425 |
-
if os.path.exists(filename):
|
426 |
-
with open(filename,"r") as f:
|
427 |
-
content = f.read()
|
428 |
-
if not content:
|
429 |
-
m = {}
|
430 |
-
else:
|
431 |
-
m = json.loads(content)
|
432 |
-
else:
|
433 |
-
with open(filename, mode='w', encoding='utf-8') as ff:
|
434 |
-
m = {}
|
435 |
-
|
436 |
|
437 |
-
json_data =
|
438 |
|
439 |
if date not in json_data:
|
440 |
json_data[date] = [comment]
|
441 |
else: json_data[date].append(comment)
|
442 |
-
|
443 |
-
with open(filename,"w") as f:
|
444 |
-
|
445 |
return "Thanks for your comment!"
|
446 |
|
447 |
|
|
|
4 |
import time
|
5 |
import datetime
|
6 |
from xml.etree import ElementTree
|
7 |
+
from huggingface_hub import CommitScheduler
|
8 |
+
from huggingface_hub import HfApi
|
9 |
+
from pathlib import Path
|
10 |
import requests
|
11 |
+
from datasets import load_dataset_builder
|
12 |
import warnings
|
13 |
warnings.filterwarnings("ignore")
|
14 |
os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
|
|
16 |
import thread6
|
17 |
MAX_DAILY_PAPER = 200
|
18 |
DAY_TIME = 60 * 60 * 24
|
19 |
+
DAY_TIME_MIN = 60 * 24
|
20 |
+
DATA_REPO_ID = "cmulgy/ArxivCopilot_data"
|
21 |
+
READ_WRITE_TOKEN = os.environ['READ_WRITE']
|
22 |
+
api = HfApi(token = READ_WRITE_TOKEN)
|
23 |
+
|
24 |
+
DATASET_DIR = Path(".")
|
25 |
+
DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
26 |
+
from huggingface_hub import hf_hub_download
|
27 |
+
|
28 |
+
|
29 |
+
scheduler = CommitScheduler(
|
30 |
+
repo_id=DATA_REPO_ID,
|
31 |
+
repo_type="dataset",
|
32 |
+
folder_path=DATASET_DIR,
|
33 |
+
path_in_repo=".",
|
34 |
+
hf_api = api,
|
35 |
+
every = DAY_TIME_MIN,
|
36 |
+
)
|
37 |
|
38 |
def feedback_thought(input_ls): # preload
|
39 |
agent, query, ansA, ansB, feedbackA, feedbackB = input_ls
|
|
|
60 |
json_data[date][query]["feedbackA"] = feedbackA
|
61 |
json_data[date][query]["answerB"] = (ansB)
|
62 |
json_data[date][query]["feedbackB"] = feedbackB
|
63 |
+
with scheduler.lock:
|
64 |
+
with open(filename,"w") as f:
|
65 |
+
json.dump(json_data,f)
|
66 |
|
67 |
preferred_ans = ""
|
68 |
if feedbackA == 1:
|
|
|
93 |
agent.thought_embedding[date] = [get_bert_embedding([tem_thought])[0]]
|
94 |
else:
|
95 |
agent.thought_embedding[date].append(get_bert_embedding([tem_thought])[0])
|
96 |
+
with scheduler.lock:
|
97 |
+
with open(filename_thought,"w") as f:
|
98 |
+
json.dump(json_data_thought,f)
|
99 |
|
100 |
+
with open(agent.thought_embedding_path, "wb") as f:
|
101 |
+
pickle.dump(agent.thought_embedding, f)
|
|
|
|
|
|
|
102 |
|
103 |
# return "Give feedback successfully!"
|
104 |
|
|
|
118 |
|
119 |
json_file = agent.dataset_path
|
120 |
|
121 |
+
update_file=update_json_file(json_file, data_collector, scheduler)
|
122 |
|
123 |
time_chunks_embed={}
|
124 |
|
|
|
127 |
papers = data[date]['abstract']
|
128 |
papers_embedding=get_bert_embedding(papers)
|
129 |
time_chunks_embed[date.strftime("%m/%d/%Y")] = papers_embedding
|
130 |
+
update_paper_file=update_pickle_file(agent.embedding_path,time_chunks_embed, scheduler)
|
131 |
agent.paper = update_file
|
132 |
agent.paper_embedding = update_paper_file
|
133 |
print("Today is " + agent.newest_day.strftime("%m/%d/%Y"))
|
134 |
|
135 |
def dailySave(agent_ls):
|
136 |
agent = agent_ls[0]
|
137 |
+
|
138 |
+
|
139 |
while True:
|
140 |
time.sleep(DAY_TIME)
|
141 |
+
with scheduler.lock:
|
142 |
+
with open(agent.trend_idea_path, "w") as f_:
|
143 |
+
json.dump(agent.trend_idea, f_)
|
144 |
+
|
145 |
+
with open(agent.thought_path, "w") as f_:
|
146 |
+
json.dump(agent.thought, f_)
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
+
with open(agent.thought_embedding_path, "wb") as f:
|
149 |
+
pickle.dump(agent.thought_embedding, f)
|
150 |
+
|
151 |
+
with open(agent.profile_path,"w") as f:
|
152 |
+
json.dump(agent.profile,f)
|
153 |
+
with open(agent.comment_path,"w") as f:
|
154 |
+
json.dump(agent.comment,f)
|
155 |
|
156 |
class ArxivAgent:
|
157 |
def __init__(self):
|
158 |
|
159 |
+
self.dataset_path = DATASET_DIR / "dataset/paper.json"
|
160 |
+
self.thought_path = DATASET_DIR / "dataset/thought.json"
|
161 |
+
self.trend_idea_path = DATASET_DIR / "dataset/trend_idea.json"
|
162 |
+
self.profile_path = DATASET_DIR / "dataset/profile.json"
|
163 |
+
self.comment_path = DATASET_DIR / "dataset/comment.json"
|
164 |
+
|
165 |
+
self.embedding_path = DATASET_DIR / "dataset/paper_embedding.pkl"
|
166 |
+
self.thought_embedding_path = DATASET_DIR / "dataset/thought_embedding.pkl"
|
167 |
+
|
168 |
+
self.feedback_path = DATASET_DIR / "dataset/feedback.json"
|
169 |
self.today = datetime.datetime.now().strftime("%m/%d/%Y")
|
170 |
|
171 |
self.newest_day = ""
|
172 |
+
|
173 |
+
|
174 |
+
# import pdb
|
175 |
+
# pdb.set_trace()
|
176 |
+
|
177 |
self.load_cache()
|
178 |
|
179 |
self.download()
|
|
|
347 |
data_collector.append(data)
|
348 |
|
349 |
json_file = self.dataset_path
|
350 |
+
|
351 |
+
|
352 |
+
try:
|
353 |
+
hf_hub_download(repo_id=DATA_REPO_ID, filename="dataset/paper.json", local_dir = ".", repo_type="dataset")
|
354 |
+
except:
|
355 |
with open(json_file,'w')as a:
|
356 |
+
print(json_file)
|
357 |
|
358 |
+
update_file=update_json_file(json_file, data_collector, scheduler)
|
359 |
|
360 |
+
try:
|
361 |
+
hf_hub_download(repo_id=DATA_REPO_ID, filename="dataset/paper_embedding.pkl", local_dir = ".", repo_type="dataset")
|
362 |
+
except:
|
363 |
with open(self.embedding_path,'wb')as a:
|
364 |
+
print(self.embedding_path)
|
365 |
time_chunks_embed={}
|
366 |
|
367 |
for data in data_collector:
|
|
|
369 |
papers = data[date]['abstract']
|
370 |
papers_embedding=get_bert_embedding(papers)
|
371 |
time_chunks_embed[date.strftime("%m/%d/%Y")] = papers_embedding
|
372 |
+
update_paper_file=update_pickle_file(self.embedding_path,time_chunks_embed, scheduler)
|
373 |
self.paper = update_file
|
374 |
self.paper_embedding = update_paper_file
|
375 |
|
376 |
|
377 |
|
378 |
def load_cache(self):
|
|
|
379 |
|
380 |
+
|
381 |
+
filename = self.feedback_path
|
382 |
+
try:
|
383 |
+
hf_hub_download(repo_id=DATA_REPO_ID, filename="dataset/feedback.json", local_dir = ".", repo_type="dataset")
|
384 |
with open(filename,"rb") as f:
|
385 |
content = f.read()
|
386 |
if not content:
|
387 |
m = {}
|
388 |
else:
|
389 |
m = json.loads(content)
|
390 |
+
except:
|
391 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
392 |
m = {}
|
393 |
self.feedback = m.copy()
|
394 |
|
395 |
filename = self.trend_idea_path
|
396 |
|
397 |
+
# if os.path.exists(filename):
|
398 |
+
try:
|
399 |
+
hf_hub_download(repo_id=DATA_REPO_ID, filename="dataset/trend_idea.json", local_dir = ".", repo_type="dataset")
|
400 |
with open(filename,"rb") as f:
|
401 |
content = f.read()
|
402 |
if not content:
|
403 |
m = {}
|
404 |
else:
|
405 |
m = json.loads(content)
|
406 |
+
except:
|
407 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
408 |
m = {}
|
409 |
self.trend_idea = m.copy()
|
410 |
|
411 |
+
|
412 |
filename = self.profile_path
|
413 |
+
# if os.path.exists(filename):
|
414 |
+
try:
|
415 |
+
hf_hub_download(repo_id=DATA_REPO_ID, filename="dataset/profile.json", local_dir = ".", repo_type="dataset")
|
416 |
with open(filename,"rb") as f:
|
417 |
content = f.read()
|
418 |
if not content:
|
419 |
m = {}
|
420 |
else:
|
421 |
m = json.loads(content)
|
422 |
+
except:
|
423 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
424 |
m = {}
|
425 |
self.profile = m.copy()
|
426 |
|
427 |
+
|
428 |
filename = self.thought_path
|
429 |
filename_emb = self.thought_embedding_path
|
430 |
+
# if os.path.exists(filename):
|
431 |
+
try:
|
432 |
+
hf_hub_download(repo_id=DATA_REPO_ID, filename="dataset/thought.json", local_dir = ".", repo_type="dataset")
|
433 |
with open(filename,"rb") as f:
|
434 |
content = f.read()
|
435 |
if not content:
|
436 |
m = {}
|
437 |
else:
|
438 |
m = json.loads(content)
|
439 |
+
except:
|
440 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
441 |
m = {}
|
442 |
|
443 |
+
# if os.path.exists(filename_emb):
|
444 |
+
try:
|
445 |
+
hf_hub_download(repo_id=DATA_REPO_ID, filename="dataset/thought_embedding.pkl", local_dir = ".", repo_type="dataset")
|
446 |
with open(filename_emb,"rb") as f:
|
447 |
content = f.read()
|
448 |
if not content:
|
449 |
m_emb = {}
|
450 |
else:
|
451 |
m_emb = pickle.loads(content)
|
452 |
+
except:
|
453 |
with open(filename_emb, mode='w', encoding='utf-8') as ff:
|
454 |
m_emb = {}
|
455 |
|
|
|
457 |
self.thought_embedding = m_emb.copy()
|
458 |
|
459 |
|
460 |
+
filename = self.comment_path
|
461 |
+
# if os.path.exists(filename):
|
462 |
+
try:
|
463 |
+
hf_hub_download(repo_id=DATA_REPO_ID, filename="dataset/comment.json", local_dir = ".", repo_type="dataset")
|
464 |
+
|
465 |
+
with open(filename,"r") as f:
|
466 |
+
content = f.read()
|
467 |
+
if not content:
|
468 |
+
m = {}
|
469 |
+
else:
|
470 |
+
m = json.loads(content)
|
471 |
+
except:
|
472 |
+
with open(filename, mode='w', encoding='utf-8') as ff:
|
473 |
+
m = {}
|
474 |
+
|
475 |
+
|
476 |
+
self.comment = m.copy()
|
477 |
|
478 |
|
479 |
|
|
|
488 |
def update_comment(self, comment):
|
489 |
date = datetime.datetime.now().strftime("%m/%d/%Y")
|
490 |
|
491 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
492 |
|
493 |
+
json_data = self.comment
|
494 |
|
495 |
if date not in json_data:
|
496 |
json_data[date] = [comment]
|
497 |
else: json_data[date].append(comment)
|
498 |
+
# with scheduler.lock:
|
499 |
+
# with open(filename,"w") as f:
|
500 |
+
# json.dump(json_data,f)
|
501 |
return "Thanks for your comment!"
|
502 |
|
503 |
|
utils.py
CHANGED
@@ -275,14 +275,14 @@ def summarize_research_field(profile, keywords, dataset,data_embedding):
|
|
275 |
content = completion.choices[0].message["content"]
|
276 |
content_l.append(content)
|
277 |
return content_l, retrieve_paper
|
278 |
-
def update_json_file(filename,data_all):
|
279 |
with open(filename,"r") as f:
|
280 |
content = f.read()
|
281 |
if not content:
|
282 |
m = {}
|
283 |
else:
|
284 |
m = json.loads(content)
|
285 |
-
|
286 |
json_data = m.copy()
|
287 |
|
288 |
# update papers in each keywords
|
@@ -296,11 +296,12 @@ def update_json_file(filename,data_all):
|
|
296 |
papers['ch_abs']=copy.deepcopy(papers['abstract'])
|
297 |
# print(papers.published)
|
298 |
json_data[time] = papers
|
299 |
-
with
|
300 |
-
|
|
|
301 |
return json_data
|
302 |
|
303 |
-
def update_pickle_file(filename, data_all):
|
304 |
|
305 |
# if os.path.exists(filename):
|
306 |
# with open(filename,"rb") as f:
|
@@ -311,8 +312,23 @@ def update_pickle_file(filename, data_all):
|
|
311 |
# m = {}
|
312 |
# else:
|
313 |
# m = json.load(content)
|
314 |
-
|
315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
# json_data = m.copy()
|
317 |
# else:
|
318 |
# with open(filename, mode='wb', encoding='utf-8') as ff:
|
@@ -325,8 +341,9 @@ def update_pickle_file(filename, data_all):
|
|
325 |
for time in data_all.keys():
|
326 |
embeddings = data_all[time]
|
327 |
pickle_data[time] =embeddings
|
328 |
-
with
|
329 |
-
|
|
|
330 |
|
331 |
return pickle_data
|
332 |
def json_to_md(filename):
|
|
|
275 |
content = completion.choices[0].message["content"]
|
276 |
content_l.append(content)
|
277 |
return content_l, retrieve_paper
|
278 |
+
def update_json_file(filename,data_all, scheduler):
|
279 |
with open(filename,"r") as f:
|
280 |
content = f.read()
|
281 |
if not content:
|
282 |
m = {}
|
283 |
else:
|
284 |
m = json.loads(content)
|
285 |
+
|
286 |
json_data = m.copy()
|
287 |
|
288 |
# update papers in each keywords
|
|
|
296 |
papers['ch_abs']=copy.deepcopy(papers['abstract'])
|
297 |
# print(papers.published)
|
298 |
json_data[time] = papers
|
299 |
+
with scheduler.lock:
|
300 |
+
with open(filename,"w") as f_:
|
301 |
+
json.dump(json_data,f_)
|
302 |
return json_data
|
303 |
|
304 |
+
def update_pickle_file(filename, data_all, scheduler):
|
305 |
|
306 |
# if os.path.exists(filename):
|
307 |
# with open(filename,"rb") as f:
|
|
|
312 |
# m = {}
|
313 |
# else:
|
314 |
# m = json.load(content)
|
315 |
+
|
316 |
+
# if os.path.exists(filename):
|
317 |
+
with open(filename,"rb") as f:
|
318 |
+
content = f.read()
|
319 |
+
if not content:
|
320 |
+
m = {}
|
321 |
+
else:
|
322 |
+
m = pickle.loads(content)
|
323 |
+
# else:
|
324 |
+
# with open(filename, mode='w', encoding='utf-8') as ff:
|
325 |
+
# m = {}
|
326 |
+
# if os.path.exists(filename):
|
327 |
+
# with open(filename, "rb") as file:
|
328 |
+
# m = pickle.load(file)
|
329 |
+
# else:
|
330 |
+
# m = {}
|
331 |
+
|
332 |
# json_data = m.copy()
|
333 |
# else:
|
334 |
# with open(filename, mode='wb', encoding='utf-8') as ff:
|
|
|
341 |
for time in data_all.keys():
|
342 |
embeddings = data_all[time]
|
343 |
pickle_data[time] =embeddings
|
344 |
+
with scheduler.lock:
|
345 |
+
with open(filename, "wb") as f:
|
346 |
+
pickle.dump(pickle_data, f)
|
347 |
|
348 |
return pickle_data
|
349 |
def json_to_md(filename):
|