cmulgy commited on
Commit
b6c8361
·
1 Parent(s): 7096ea6
Files changed (1) hide show
  1. arxiv_agent.py +84 -65
arxiv_agent.py CHANGED
@@ -110,8 +110,21 @@ def dailyDownload(agent_ls):
110
  agent.paper_embedding = update_paper_file
111
  print("Today is " + agent.newest_day.strftime("%m/%d/%Y"))
112
 
 
 
 
 
 
 
 
 
 
113
 
114
-
 
 
 
 
115
 
116
 
117
  class ArxivAgent:
@@ -119,6 +132,8 @@ class ArxivAgent:
119
 
120
  self.dataset_path = "./dataset/paper.json"
121
  self.thought_path = "./dataset/thought.json"
 
 
122
 
123
  self.embedding_path = "./dataset/paper_embedding.pkl"
124
  self.thought_embedding_path = './dataset/thought_embedding.pkl'
@@ -127,30 +142,24 @@ class ArxivAgent:
127
  self.today = datetime.datetime.now().strftime("%m/%d/%Y")
128
 
129
  self.newest_day = ""
130
- self.load_thought()
131
- self.load_feedback()
132
  self.download()
133
  try:
134
  thread6.run_threaded(dailyDownload, [self])
135
- # thread6.start_new_thread( print_time, ["Thread-2", 4] )
136
  except:
137
  print("Error: unable to start thread")
138
- # self.paper = self.download()
139
- # self.paper_by_date = self.paper
140
  def edit_profile(self, profile, author_name):
141
- profile = profile
142
- filename = 'dataset/profile.json'
143
- with open(filename, 'r', encoding='utf-8') as file:
144
- data = json.load(file)
145
- data[author_name]=profile
146
- with open(filename, "w") as f:
147
- json.dump(data, f)
148
  return "Successfully edit profile!"
149
 
150
  def get_profile(self, author_name):
151
  if author_name == "": return None
152
- # import pdb
153
- # pdb.set_trace()
154
  profile = self.get_arxiv_data_by_author(author_name)
155
  return profile
156
  def select_date(self, method, profile_input):
@@ -186,16 +195,40 @@ class ArxivAgent:
186
  data_chunk_embedding=chunk_embedding_date
187
  profile = profile_input
188
 
189
- # trend, paper_link = summarize_research_field(profile, "Machine Learning", dataset) # trend
190
- trend, paper_link = summarize_research_field(profile, "Machine Learning", dataset,data_chunk_embedding) # trend
 
 
 
 
 
 
 
191
 
192
  # import pdb
193
  # pdb.set_trace()
194
- reference = papertitleAndLink(paper_link)
195
- # print("Trend:", self.trend,"\n")
196
- idea = generate_ideas(trend) # idea
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
- key_update = list(self.paper.keys())[-1]
 
 
 
 
 
199
  if key_update not in self.thought:
200
  self.thought[key_update] = []
201
  if key_update not in self.thought_embedding:
@@ -205,22 +238,11 @@ class ArxivAgent:
205
  self.thought_embedding[key_update].append(get_bert_embedding([trend])[0])
206
  self.thought[key_update].append(idea[0])
207
  self.thought_embedding[key_update].append(get_bert_embedding([idea])[0])
208
- # with open(self.dataset_path, "w") as f_:
209
- # json.dump(self.paper, f_)
210
-
211
- with open(self.thought_path, "w") as f_:
212
- json.dump(self.thought, f_)
213
-
214
- with open(self.thought_embedding_path, "wb") as f:
215
- pickle.dump(self.thought_embedding, f)
216
-
217
 
218
  return trend, reference, idea
219
 
220
  def response(self, data, profile_input):
221
- # dataset = self.paper_by_date
222
 
223
- # dataset = self.paper
224
  query = [data]
225
  profile = profile_input
226
 
@@ -315,7 +337,7 @@ class ArxivAgent:
315
 
316
 
317
 
318
- def load_feedback(self):
319
  filename = self.feedback_path
320
 
321
  if os.path.exists(filename):
@@ -330,13 +352,35 @@ class ArxivAgent:
330
  m = {}
331
  self.feedback = m.copy()
332
 
333
-
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
- def load_thought(self):
337
  filename = self.thought_path
338
  filename_emb = self.thought_embedding_path
339
-
340
  if os.path.exists(filename):
341
  with open(filename,"rb") as f:
342
  content = f.read()
@@ -348,7 +392,6 @@ class ArxivAgent:
348
  with open(filename, mode='w', encoding='utf-8') as ff:
349
  m = {}
350
 
351
-
352
  if os.path.exists(filename_emb):
353
  with open(filename_emb,"rb") as f:
354
  content = f.read()
@@ -366,12 +409,7 @@ class ArxivAgent:
366
 
367
 
368
 
369
- # for date in self.thought.keys():
370
- # papers = data[time]['abstract']
371
- # papers_embedding=get_bert_embedding(papers)
372
- # time_chunks_embed[time.strftime("%m/%d/%Y")] = papers_embedding
373
- # return
374
- # for k in json_data.keys():
375
  def update_feedback_thought(self, query, ansA, ansB, feedbackA, feedbackB):
376
  try:
377
  thread6.run_threaded(feedback_thought, [self, query, ansA, ansB, feedbackA, feedbackB])
@@ -413,22 +451,7 @@ class ArxivAgent:
413
 
414
 
415
 
416
- filename = 'dataset/profile.json'
417
- if os.path.exists(filename):
418
- with open(filename,"r") as f:
419
- content = f.read()
420
- if not content:
421
- m = {}
422
- else:
423
- m = json.loads(content)
424
- else:
425
- with open(filename, mode='w', encoding='utf-8') as ff:
426
- m = {}
427
-
428
-
429
- json_data = m.copy()
430
-
431
- if author_name in json_data: return json_data[author_name]
432
 
433
  author_query = author_name.replace(" ", "+")
434
  url = f"http://export.arxiv.org/api/query?search_query=au:{author_query}&start=0&max_results=300" # Adjust max_results if needed
@@ -512,15 +535,11 @@ class ArxivAgent:
512
  # pdb.set_trace()
513
  personal_info = "; ".join([f"{details['Title & Abstract']}" for details in papers_list])
514
  info = summarize_research_direction(personal_info)
515
- json_data[author_name] = info
516
- with open(filename,"w") as f:
517
- json.dump(json_data,f)
518
- return json_data[author_name]
519
 
520
- # data = {author_name: {"paper_{}".format(i+1): paper for i, paper in enumerate(papers_list)}}
521
 
522
  else:
523
- # print("Failed to fetch data from arXiv.")
524
  return None
525
 
526
 
 
110
  agent.paper_embedding = update_paper_file
111
  print("Today is " + agent.newest_day.strftime("%m/%d/%Y"))
112
 
113
+ def dailySave(agent_ls):
114
+ agent = agent_ls[0]
115
+ while True:
116
+ time.sleep(DAY_TIME)
117
+ with open(agent.trend_idea_path, "w") as f_:
118
+ json.dump(agent.trend_idea, f_)
119
+
120
+ with open(agent.thought_path, "w") as f_:
121
+ json.dump(agent.thought, f_)
122
 
123
+ with open(agent.thought_embedding_path, "wb") as f:
124
+ pickle.dump(agent.thought_embedding, f)
125
+
126
+ with open(agent.profile_path,"w") as f:
127
+ json.dump(agent.profile,f)
128
 
129
 
130
  class ArxivAgent:
 
132
 
133
  self.dataset_path = "./dataset/paper.json"
134
  self.thought_path = "./dataset/thought.json"
135
+ self.trend_idea_path = "./dataset/trend_idea.json"
136
+ self.profile_path = "./dataset/profile.json"
137
 
138
  self.embedding_path = "./dataset/paper_embedding.pkl"
139
  self.thought_embedding_path = './dataset/thought_embedding.pkl'
 
142
  self.today = datetime.datetime.now().strftime("%m/%d/%Y")
143
 
144
  self.newest_day = ""
145
+ self.load_cache()
146
+
147
  self.download()
148
  try:
149
  thread6.run_threaded(dailyDownload, [self])
150
+ thread6.run_threaded(dailySave, [self])
151
  except:
152
  print("Error: unable to start thread")
153
+
 
154
  def edit_profile(self, profile, author_name):
155
+
156
+ self.profile[author_name]=profile
157
+
 
 
 
 
158
  return "Successfully edit profile!"
159
 
160
  def get_profile(self, author_name):
161
  if author_name == "": return None
162
+
 
163
  profile = self.get_arxiv_data_by_author(author_name)
164
  return profile
165
  def select_date(self, method, profile_input):
 
195
  data_chunk_embedding=chunk_embedding_date
196
  profile = profile_input
197
 
198
+ key_update = list(self.paper.keys())[-1]
199
+ isQuery = False
200
+ if profile in self.trend_idea:
201
+ if key_update in self.trend_idea[profile]:
202
+ if method in self.trend_idea[profile][key_update]:
203
+ trend = self.trend_idea[profile][key_update][method]["trend"]
204
+ reference = self.trend_idea[profile][key_update][method]["reference"]
205
+ idea = self.trend_idea[profile][key_update][method]["idea"]
206
+ isQuery = True
207
 
208
  # import pdb
209
  # pdb.set_trace()
210
+ if not(isQuery):
211
+ trend, paper_link = summarize_research_field(profile, "Machine Learning", dataset,data_chunk_embedding) # trend
212
+ reference = papertitleAndLink(paper_link)
213
+ idea = generate_ideas(trend) # idea
214
+ if profile in self.trend_idea:
215
+ if key_update in self.trend_idea[profile]:
216
+ if not(method in self.trend_idea[profile][key_update]):
217
+ self.trend_idea[profile][key_update][method] = {}
218
+ else:
219
+ self.trend_idea[profile][key_update] = {}
220
+ self.trend_idea[profile][key_update][method] = {}
221
+ else:
222
+ self.trend_idea[profile] = {}
223
+ self.trend_idea[profile][key_update] = {}
224
+ self.trend_idea[profile][key_update][method] = {}
225
 
226
+ self.trend_idea[profile][key_update][method]["trend"] = trend
227
+ self.trend_idea[profile][key_update][method]["reference"] = reference
228
+ self.trend_idea[profile][key_update][method]["idea"] = idea
229
+
230
+
231
+
232
  if key_update not in self.thought:
233
  self.thought[key_update] = []
234
  if key_update not in self.thought_embedding:
 
238
  self.thought_embedding[key_update].append(get_bert_embedding([trend])[0])
239
  self.thought[key_update].append(idea[0])
240
  self.thought_embedding[key_update].append(get_bert_embedding([idea])[0])
 
 
 
 
 
 
 
 
 
241
 
242
  return trend, reference, idea
243
 
244
  def response(self, data, profile_input):
 
245
 
 
246
  query = [data]
247
  profile = profile_input
248
 
 
337
 
338
 
339
 
340
+ def load_cache(self):
341
  filename = self.feedback_path
342
 
343
  if os.path.exists(filename):
 
352
  m = {}
353
  self.feedback = m.copy()
354
 
355
+ filename = self.trend_idea_path
356
 
357
+ if os.path.exists(filename):
358
+ with open(filename,"rb") as f:
359
+ content = f.read()
360
+ if not content:
361
+ m = {}
362
+ else:
363
+ m = json.loads(content)
364
+ else:
365
+ with open(filename, mode='w', encoding='utf-8') as ff:
366
+ m = {}
367
+ self.trend_idea = m.copy()
368
+
369
+ filename = self.profile_path
370
+ if os.path.exists(filename):
371
+ with open(filename,"rb") as f:
372
+ content = f.read()
373
+ if not content:
374
+ m = {}
375
+ else:
376
+ m = json.loads(content)
377
+ else:
378
+ with open(filename, mode='w', encoding='utf-8') as ff:
379
+ m = {}
380
+ self.profile = m.copy()
381
 
 
382
  filename = self.thought_path
383
  filename_emb = self.thought_embedding_path
 
384
  if os.path.exists(filename):
385
  with open(filename,"rb") as f:
386
  content = f.read()
 
392
  with open(filename, mode='w', encoding='utf-8') as ff:
393
  m = {}
394
 
 
395
  if os.path.exists(filename_emb):
396
  with open(filename_emb,"rb") as f:
397
  content = f.read()
 
409
 
410
 
411
 
412
+
 
 
 
 
 
413
  def update_feedback_thought(self, query, ansA, ansB, feedbackA, feedbackB):
414
  try:
415
  thread6.run_threaded(feedback_thought, [self, query, ansA, ansB, feedbackA, feedbackB])
 
451
 
452
 
453
 
454
+ if author_name in self.profile: return self.profile[author_name]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
 
456
  author_query = author_name.replace(" ", "+")
457
  url = f"http://export.arxiv.org/api/query?search_query=au:{author_query}&start=0&max_results=300" # Adjust max_results if needed
 
535
  # pdb.set_trace()
536
  personal_info = "; ".join([f"{details['Title & Abstract']}" for details in papers_list])
537
  info = summarize_research_direction(personal_info)
538
+ self.profile[author_name] = info
 
 
 
539
 
540
+ return self.profile[author_name]
541
 
542
  else:
 
543
  return None
544
 
545