kenken999 commited on
Commit
73eedaf
·
1 Parent(s): e91d22b
babyagi/babyagi.py CHANGED
@@ -20,7 +20,16 @@ from groq import Groq
20
 
21
  # default opt out of chromadb telemetry.
22
  from chromadb.config import Settings
 
 
 
23
 
 
 
 
 
 
 
24
  client = chromadb.Client(Settings(anonymized_telemetry=False))
25
 
26
  # Engine configuration
@@ -43,7 +52,8 @@ COOPERATIVE_MODE = "none"
43
  JOIN_EXISTING_OBJECTIVE = False
44
 
45
  # Goal configuration
46
- OBJECTIVE = os.getenv("OBJECTIVE", "")
 
47
  INITIAL_TASK = os.getenv("INITIAL_TASK", os.getenv("FIRST_TASK", ""))
48
 
49
  # Model configuration
@@ -182,7 +192,11 @@ class LlamaEmbeddingFunction(EmbeddingFunction):
182
  def __call__(self, texts: Documents) -> Embeddings:
183
  embeddings = []
184
  for t in texts:
185
- e = llm_embed.embed(t)
 
 
 
 
186
  embeddings.append(e)
187
  return embeddings
188
 
@@ -200,24 +214,46 @@ class DefaultResultsStorage:
200
  )
201
 
202
  metric = "cosine"
203
- if LLM_MODEL.startswith("llama"):
204
- embedding_function = LlamaEmbeddingFunction()
205
- else:
206
- embedding_function = OpenAIEmbeddingFunction(api_key=OPENAI_API_KEY)
207
  self.collection = chroma_client.get_or_create_collection(
208
  name=RESULTS_STORE_NAME,
209
  metadata={"hnsw:space": metric},
210
  embedding_function=embedding_function,
211
  )
212
 
 
 
213
  def add(self, task: Dict, result: str, result_id: str):
214
 
215
  # Break the function if LLM_MODEL starts with "human" (case-insensitive)
216
  if LLM_MODEL.startswith("human"):
217
  return
 
 
218
  # Continue with the rest of the function
219
-
220
- embeddings = llm_embed.embed(result) if LLM_MODEL.startswith("llama") else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  if (
222
  len(self.collection.get(ids=[result_id], include=[])["ids"]) > 0
223
  ): # Check if the result already exists
@@ -337,34 +373,35 @@ def openai_call(
337
  temperature: float = OPENAI_TEMPERATURE,
338
  max_tokens: int = 100,
339
  ):
340
-
341
- messages=[
342
- {
343
- "role": "user",
344
- "content": "prompt"
345
- }
346
- ],
347
- client = Groq(api_key=os.getenv("api_key"))
348
- res = ""
349
- completion = client.chat.completions.create(
350
- model="llama3-8b-8192",
351
- messages=[
352
- {
353
- "role": "user",
354
- "content": prompt
355
- }
356
- ],
357
- temperature=1,
358
- max_tokens=1024,
359
- top_p=1,
360
- stream=True,
361
- stop=None,
362
- )
363
- for chunk in completion:
364
- print(chunk.choices[0].delta.content)
365
- print(chunk.choices[0].delta.content or "", end="")
366
- res += chunk.choices[0].delta.content or ""
367
- return res
 
368
 
369
  while True:
370
 
@@ -474,7 +511,7 @@ The number of each entry must be followed by a period. If your list is empty, wr
474
  Unless your list is empty, do not include any headers before your numbered list or follow your numbered list with any other output."""
475
 
476
  print(f'\n*****TASK CREATION AGENT PROMPT****\n{prompt}\n')
477
- response = openai_call(prompt, max_tokens=2000)
478
  print(f'\n****TASK CREATION AGENT RESPONSE****\n{response}\n')
479
  new_tasks = response.split('\n')
480
  new_tasks_list = []
@@ -584,23 +621,21 @@ def main():
584
  while loop:
585
  # As long as there are tasks in the storage...
586
  if not tasks_storage.is_empty():
 
587
  # Print the task list
588
  print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m")
589
  for t in tasks_storage.get_task_names():
590
  print(" • " + str(t))
591
 
592
-
593
  # Step 1: Pull the first incomplete task
594
  task = tasks_storage.popleft()
595
  print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m")
596
  print(str(task["task_name"]))
597
 
598
-
599
  # Send to execution function to complete the task based on the context
600
  result = execution_agent(OBJECTIVE, str(task["task_name"]))
601
  print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m")
602
  print(result)
603
- return
604
 
605
  # Step 2: Enrich result and store in the results storage
606
  # This is where you should enrich the result if needed
 
20
 
21
  # default opt out of chromadb telemetry.
22
  from chromadb.config import Settings
23
+ from transformers import AutoTokenizer, AutoModel
24
+ import torch
25
+ import numpy
26
 
27
+ # モデル名を指定
28
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
29
+
30
+ # トークナイザーとモデルをロード
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
32
+ model = AutoModel.from_pretrained(model_name)
33
  client = chromadb.Client(Settings(anonymized_telemetry=False))
34
 
35
  # Engine configuration
 
52
  JOIN_EXISTING_OBJECTIVE = False
53
 
54
  # Goal configuration
55
+ #OBJECTIVE = os.getenv("OBJECTIVE", "")
56
+ OBJECTIVE = "ボットの性能をよくする方法 日本語で説明"
57
  INITIAL_TASK = os.getenv("INITIAL_TASK", os.getenv("FIRST_TASK", ""))
58
 
59
  # Model configuration
 
192
  def __call__(self, texts: Documents) -> Embeddings:
193
  embeddings = []
194
  for t in texts:
195
+ #e = llm_embed.embed(t)
196
+ inputs = tokenizer(t, return_tensors="pt")
197
+ outputs = model(**inputs)
198
+ # [CLS]トークンの出力を取得
199
+ e = outputs.last_hidden_state[:,0,:].squeeze().detach().cpu().numpy().tolist()
200
  embeddings.append(e)
201
  return embeddings
202
 
 
214
  )
215
 
216
  metric = "cosine"
217
+ #if LLM_MODEL.startswith("llama"):
218
+ embedding_function = LlamaEmbeddingFunction()
219
+ #else:
220
+ # embedding_function = OpenAIEmbeddingFunction(api_key=OPENAI_API_KEY)
221
  self.collection = chroma_client.get_or_create_collection(
222
  name=RESULTS_STORE_NAME,
223
  metadata={"hnsw:space": metric},
224
  embedding_function=embedding_function,
225
  )
226
 
227
+
228
+
229
  def add(self, task: Dict, result: str, result_id: str):
230
 
231
  # Break the function if LLM_MODEL starts with "human" (case-insensitive)
232
  if LLM_MODEL.startswith("human"):
233
  return
234
+ return
235
+ #from langchain_community.chat_models import ChatOpenAI
236
  # Continue with the rest of the function
237
+ #llm_embed = ChatOpenAI(model_name="lama3-70b-8192",
238
+ # openai_api_key="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",
239
+ # openai_api_base="https://api.groq.com/openai/v1",
240
+ # )
241
+ #import openai
242
+ #openai.api_key = "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"
243
+ #openai.api_base = "https://api.groq.com/openai/v1"
244
+ #response = openai.embeddings.create(input=result,
245
+ # model="lama3-70b-8192",
246
+ #
247
+ inputs = tokenizer(result, return_tensors="pt")
248
+ outputs = model(**inputs)
249
+ # [CLS]トークンの出力を取得
250
+ embeddings = outputs.last_hidden_state[:,0,:].squeeze().detach().cpu().numpy().tolist()
251
+ #cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze()
252
+ # テンソルが CPU 上にあることを確認し、NumPy 配列に変換
253
+ #cls_embedding_np = cls_embedding.detach().cpu().numpy()
254
+
255
+ #embeddings = response['data'][0]['embedding']
256
+ #embeddings = llm_embed.embed(result) if LLM_MODEL.startswith("llama") else None
257
  if (
258
  len(self.collection.get(ids=[result_id], include=[])["ids"]) > 0
259
  ): # Check if the result already exists
 
373
  temperature: float = OPENAI_TEMPERATURE,
374
  max_tokens: int = 100,
375
  ):
376
+ while True:
377
+ messages=[
378
+ {
379
+ "role": "user",
380
+ "content": "prompt"
381
+ }
382
+ ],
383
+ client = Groq(api_key=os.getenv("api_key"))
384
+ res = ""
385
+ print(prompt)
386
+ completion = client.chat.completions.create(
387
+ model="llama3-8b-8192",
388
+ messages=[
389
+ {
390
+ "role": "user",
391
+ "content": prompt
392
+ }
393
+ ],
394
+ temperature=1,
395
+ max_tokens=1024,
396
+ top_p=1,
397
+ stream=True,
398
+ stop=None,
399
+ )
400
+ for chunk in completion:
401
+ #print(chunk.choices[0].delta.content)
402
+ #print(chunk.choices[0].delta.content or "", end="")
403
+ res += chunk.choices[0].delta.content or ""
404
+ return res
405
 
406
  while True:
407
 
 
511
  Unless your list is empty, do not include any headers before your numbered list or follow your numbered list with any other output."""
512
 
513
  print(f'\n*****TASK CREATION AGENT PROMPT****\n{prompt}\n')
514
+ response = openai_call(prompt, max_tokens=4000)
515
  print(f'\n****TASK CREATION AGENT RESPONSE****\n{response}\n')
516
  new_tasks = response.split('\n')
517
  new_tasks_list = []
 
621
  while loop:
622
  # As long as there are tasks in the storage...
623
  if not tasks_storage.is_empty():
624
+ #OBJECTIVE = "ボットの性能をよくする方法 日本語で説明"
625
  # Print the task list
626
  print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m")
627
  for t in tasks_storage.get_task_names():
628
  print(" • " + str(t))
629
 
 
630
  # Step 1: Pull the first incomplete task
631
  task = tasks_storage.popleft()
632
  print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m")
633
  print(str(task["task_name"]))
634
 
 
635
  # Send to execution function to complete the task based on the context
636
  result = execution_agent(OBJECTIVE, str(task["task_name"]))
637
  print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m")
638
  print(result)
 
639
 
640
  # Step 2: Enrich result and store in the results storage
641
  # This is where you should enrich the result if needed
babyagi/extensions/weaviate_storage.py CHANGED
@@ -6,7 +6,19 @@ from typing import Dict, List
6
  import openai
7
  import weaviate
8
  from weaviate.embedded import EmbeddedOptions
 
 
 
 
 
9
 
 
 
 
 
 
 
 
10
 
11
  def can_import(module_name):
12
  try:
@@ -121,7 +133,11 @@ class WeaviateResultsStorage:
121
  # Get embedding for the text
122
  def get_embedding(self, text: str) -> list:
123
  text = text.replace("\n", " ")
124
-
 
 
 
 
125
  if self.llm_model.startswith("llama"):
126
  from llama_cpp import Llama
127
 
 
6
  import openai
7
  import weaviate
8
  from weaviate.embedded import EmbeddedOptions
9
+ # default opt out of chromadb telemetry.
10
+ from chromadb.config import Settings
11
+ from transformers import AutoTokenizer, AutoModel
12
+ import torch
13
+ import numpy
14
 
15
+ # モデル名を指定
16
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
17
+
18
+ # トークナイザーとモデルをロード
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ model = AutoModel.from_pretrained(model_name)
21
+ client = chromadb.Client(Settings(anonymized_telemetry=False))
22
 
23
  def can_import(module_name):
24
  try:
 
133
  # Get embedding for the text
134
  def get_embedding(self, text: str) -> list:
135
  text = text.replace("\n", " ")
136
+ inputs = tokenizer(text, return_tensors="pt")
137
+ outputs = model(**inputs)
138
+ # [CLS]トークンの出力を取得
139
+ embeddings = outputs.last_hidden_state[:,0,:].squeeze().detach().cpu().numpy().tolist()
140
+ return embeddings
141
  if self.llm_model.startswith("llama"):
142
  from llama_cpp import Llama
143