sabazo commited on
Commit
feae37f
2 Parent(s): 647a5e9 c8fe20b

Merge pull request #3 from almutareb/reference_parser

Browse files
Files changed (2) hide show
  1. mixtral_agent.py +116 -51
  2. requirements.txt +183 -0
mixtral_agent.py CHANGED
@@ -12,9 +12,15 @@ from langchain.agents.format_scratchpad import format_log_to_str
12
  from langchain.agents.output_parsers import (
13
  ReActJsonSingleInputOutputParser,
14
  )
 
 
 
 
 
15
  from langchain.tools.render import render_text_description
16
  import os
17
 
 
18
  import dotenv
19
 
20
  dotenv.load_dotenv()
@@ -26,39 +32,92 @@ OLLMA_BASE_URL = os.getenv("OLLMA_BASE_URL")
26
  # supports many more optional parameters. Hover on your `ChatOllama(...)`
27
  # class to view the latest available supported parameters
28
  llm = ChatOllama(
29
- model="mistral",
30
  base_url= OLLMA_BASE_URL
31
  )
32
  prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
33
 
34
- # using LangChain Expressive Language chain syntax
35
- # learn more about the LCEL on
36
- # https://python.langchain.com/docs/expression_language/why
37
- chain = prompt | llm | StrOutputParser()
38
-
39
- # for brevity, response is printed in terminal
40
- # You can use LangServe to deploy your application for
41
- # production
42
- print(chain.invoke({"topic": "Space travel"}))
43
-
44
- retriever = ArxivRetriever(load_max_docs=2)
45
-
46
- tools = [
47
- create_retriever_tool(
48
- retriever,
49
- "search arxiv's database for",
50
- "Use this to recomend the user a paper to read Unless stated please choose the most recent models",
51
- # "Searches and returns excerpts from the 2022 State of the Union.",
52
- ),
53
-
54
- Tool(
55
- name="SerpAPI",
56
- description="A low-cost Google Search API. Useful for when you need to answer questions about current events. Input should be a search query.",
57
- func=SerpAPIWrapper().run,
58
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- ]
 
 
 
 
 
 
61
 
 
 
 
 
 
 
 
62
 
63
 
64
  prompt = hub.pull("hwchase17/react-json")
@@ -85,33 +144,39 @@ agent_executor = AgentExecutor(
85
  agent=agent,
86
  tools=tools,
87
  verbose=True,
88
- handle_parsing_errors=True #prevents error
89
  )
90
 
91
- # agent_executor.invoke(
92
- # {
93
- # "input": "Who is the current holder of the speed skating world record on 500 meters? What is her current age raised to the 0.43 power?"
94
- # }
95
- # )
96
-
97
- # agent_executor.invoke(
98
- # {
99
- # "input": "what are large language models and why are they so expensive to run?"
100
- # }
101
- # )
102
 
103
- # agent_executor.invoke(
104
- # {
105
- # "input": "How to generate videos from images using state of the art macchine learning models"
106
- # }
107
- # )
108
 
 
 
 
 
 
 
 
109
 
110
- agent_executor.invoke(
111
- {
112
- "input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever " +
113
- "add the urls of the papers used in the final answer using the metadata from the retriever"
114
- # f"Please prioritize the newest papers this is the current data {get_current_date()}"
115
- }
116
- )
 
 
 
 
 
 
 
 
 
 
117
 
 
12
  from langchain.agents.output_parsers import (
13
  ReActJsonSingleInputOutputParser,
14
  )
15
+ # Import things that are needed generically
16
+ from langchain.pydantic_v1 import BaseModel, Field
17
+ from langchain.tools import BaseTool, StructuredTool, tool
18
+ from typing import List, Dict
19
+ from datetime import datetime
20
  from langchain.tools.render import render_text_description
21
  import os
22
 
23
+
24
  import dotenv
25
 
26
  dotenv.load_dotenv()
 
32
  # supports many more optional parameters. Hover on your `ChatOllama(...)`
33
  # class to view the latest available supported parameters
34
  llm = ChatOllama(
35
+ model="mistral:instruct",
36
  base_url= OLLMA_BASE_URL
37
  )
38
  prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
39
 
40
+ arxiv_retriever = ArxivRetriever(load_max_docs=2)
41
+
42
+
43
+
44
+ def format_info_list(info_list: List[Dict[str, str]]) -> str:
45
+ """
46
+ Format a list of dictionaries containing information into a single string.
47
+
48
+ Args:
49
+ info_list (List[Dict[str, str]]): A list of dictionaries containing information.
50
+
51
+ Returns:
52
+ str: A formatted string containing the information from the list.
53
+ """
54
+ formatted_strings = []
55
+ for info_dict in info_list:
56
+ formatted_string = "|"
57
+ for key, value in info_dict.items():
58
+ if isinstance(value, datetime.date):
59
+ value = value.strftime('%Y-%m-%d')
60
+ formatted_string += f"'{key}': '{value}', "
61
+ formatted_string = formatted_string.rstrip(', ') + "|"
62
+ formatted_strings.append(formatted_string)
63
+ return '\n'.join(formatted_strings)
64
+
65
+ @tool
66
+ def arxiv_search(query: str) -> str:
67
+ """Using the arxiv search and collects metadata."""
68
+ # return "LangChain"
69
+ global all_sources
70
+ data = arxiv_retriever.invoke(query)
71
+ meta_data = [i.metadata for i in data]
72
+ # meta_data += all_sources
73
+ # all_sources += meta_data
74
+ all_sources += meta_data
75
+
76
+ # formatted_info = format_info(entry_id, published, title, authors)
77
+
78
+ # formatted_info = format_info_list(all_sources)
79
+
80
+ return meta_data.__str__()
81
+
82
+ @tool
83
+ def google_search(query: str) -> str:
84
+ """Using the google search and collects metadata."""
85
+ # return "LangChain"
86
+ global all_sources
87
+
88
+ x = SerpAPIWrapper()
89
+ search_results:dict = x.results(query)
90
+
91
+
92
+ organic_source = search_results['organic_results']
93
+ # formatted_string = "Title: {title}, link: {link}, snippet: {snippet}".format(**organic_source)
94
+ cleaner_sources = ["Title: {title}, link: {link}, snippet: {snippet}".format(**i) for i in organic_source]
95
+
96
+ all_sources += cleaner_sources
97
+
98
+ return cleaner_sources.__str__()
99
+ # return organic_source
100
+
101
+
102
+
103
+
104
+ tools = [arxiv_search,google_search]
105
 
106
+ # tools = [
107
+ # create_retriever_tool(
108
+ # retriever,
109
+ # "search arxiv's database for",
110
+ # "Use this to recomend the user a paper to read Unless stated please choose the most recent models",
111
+ # # "Searches and returns excerpts from the 2022 State of the Union.",
112
+ # ),
113
 
114
+ # Tool(
115
+ # name="SerpAPI",
116
+ # description="A low-cost Google Search API. Useful for when you need to answer questions about current events. Input should be a search query.",
117
+ # func=SerpAPIWrapper().run,
118
+ # )
119
+
120
+ # ]
121
 
122
 
123
  prompt = hub.pull("hwchase17/react-json")
 
144
  agent=agent,
145
  tools=tools,
146
  verbose=True,
147
+ # handle_parsing_errors=True #prevents error
148
  )
149
 
150
+
 
 
 
 
 
 
 
 
 
 
151
 
152
+ if __name__ == "__main__":
153
+
154
+ # global variable for collecting sources
155
+ all_sources = []
 
156
 
157
+ input = agent_executor.invoke(
158
+ {
159
+ "input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever " +
160
+ "add the urls of the papers used in the final answer using the metadata from the retriever please do not use '`' "
161
+ # f"Please prioritize the newest papers this is the current data {get_current_date()}"
162
+ }
163
+ )
164
 
165
+ # input_1 = agent_executor.invoke(
166
+ # {
167
+ # "input": "I am looking for a text to 3d model; Using the axriv retriever " +
168
+ # "add the urls of the papers used in the final answer using the metadata from the retriever"
169
+ # # f"Please prioritize the newest papers this is the current data {get_current_date()}"
170
+ # }
171
+ # )
172
+
173
+ # input_1 = agent_executor.invoke(
174
+ # {
175
+ # "input": "I am looking for a text to 3d model; Using the google search tool " +
176
+ # "add the urls in the final answer using the metadata from the retriever, also provid a summary of the searches"
177
+ # # f"Please prioritize the newest papers this is the current data {get_current_date()}"
178
+ # }
179
+ # )
180
+
181
+ x = 0
182
 
requirements.txt CHANGED
@@ -189,3 +189,186 @@ websockets==11.0.3
189
  wrapt==1.16.0
190
  yarl==1.9.4
191
  zipp==3.17.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  wrapt==1.16.0
190
  yarl==1.9.4
191
  zipp==3.17.0
192
+ aiofiles==23.2.1
193
+ aiohttp==3.9.3
194
+ aiosignal==1.3.1
195
+ altair==5.2.0
196
+ annotated-types==0.6.0
197
+ anyio==4.2.0
198
+ arxiv==2.1.0
199
+ asgiref==3.7.2
200
+ async-timeout==4.0.3
201
+ attrs==23.2.0
202
+ backoff==2.2.1
203
+ bcrypt==4.1.2
204
+ beautifulsoup4==4.12.3
205
+ boto3==1.34.42
206
+ botocore==1.34.42
207
+ build==1.0.3
208
+ cachetools==5.3.2
209
+ certifi==2024.2.2
210
+ chardet==5.2.0
211
+ charset-normalizer==3.3.2
212
+ chroma-hnswlib==0.7.3
213
+ chromadb==0.4.22
214
+ click==8.1.7
215
+ coloredlogs==15.0.1
216
+ contourpy==1.2.0
217
+ cycler==0.12.1
218
+ dataclasses-json==0.6.4
219
+ dataclasses-json-speakeasy==0.5.11
220
+ Deprecated==1.2.14
221
+ emoji==2.10.1
222
+ exceptiongroup==1.2.0
223
+ faiss-cpu==1.7.4
224
+ fastapi==0.109.2
225
+ feedparser==6.0.10
226
+ ffmpy==0.3.2
227
+ filelock==3.13.1
228
+ filetype==1.2.0
229
+ flatbuffers==23.5.26
230
+ fonttools==4.48.1
231
+ frozenlist==1.4.1
232
+ fsspec==2024.2.0
233
+ gitdb==4.0.11
234
+ GitPython==3.1.41
235
+ google-auth==2.27.0
236
+ google_search_results==2.4.2
237
+ googleapis-common-protos==1.62.0
238
+ gradio==3.48.0
239
+ gradio_client==0.6.1
240
+ greenlet==3.0.3
241
+ grpcio==1.60.1
242
+ h11==0.14.0
243
+ httpcore==1.0.3
244
+ httptools==0.6.1
245
+ httpx==0.26.0
246
+ huggingface-hub==0.20.3
247
+ humanfriendly==10.0
248
+ idna==3.6
249
+ importlib-metadata==6.11.0
250
+ importlib-resources==6.1.1
251
+ Jinja2==3.1.3
252
+ jmespath==1.0.1
253
+ joblib==1.3.2
254
+ jsonpatch==1.33
255
+ jsonpath-python==1.0.6
256
+ jsonpointer==2.4
257
+ jsonschema==4.21.1
258
+ jsonschema-specifications==2023.12.1
259
+ kiwisolver==1.4.5
260
+ kubernetes==29.0.0
261
+ langchain==0.1.7
262
+ langchain-community==0.0.20
263
+ langchain-core==0.1.23
264
+ langchainhub==0.1.14
265
+ langdetect==1.0.9
266
+ langsmith==0.0.87
267
+ lxml==5.1.0
268
+ MarkupSafe==2.1.5
269
+ marshmallow==3.20.2
270
+ matplotlib==3.8.3
271
+ mmh3==4.1.0
272
+ monotonic==1.6
273
+ mpmath==1.3.0
274
+ multidict==6.0.5
275
+ mypy-extensions==1.0.0
276
+ networkx==3.2.1
277
+ nltk==3.8.1
278
+ numpy==1.26.4
279
+ nvidia-cublas-cu12==12.1.3.1
280
+ nvidia-cuda-cupti-cu12==12.1.105
281
+ nvidia-cuda-nvrtc-cu12==12.1.105
282
+ nvidia-cuda-runtime-cu12==12.1.105
283
+ nvidia-cudnn-cu12==8.9.2.26
284
+ nvidia-cufft-cu12==11.0.2.54
285
+ nvidia-curand-cu12==10.3.2.106
286
+ nvidia-cusolver-cu12==11.4.5.107
287
+ nvidia-cusparse-cu12==12.1.0.106
288
+ nvidia-nccl-cu12==2.19.3
289
+ nvidia-nvjitlink-cu12==12.3.101
290
+ nvidia-nvtx-cu12==12.1.105
291
+ oauthlib==3.2.2
292
+ onnxruntime==1.17.0
293
+ opentelemetry-api==1.22.0
294
+ opentelemetry-exporter-otlp-proto-common==1.22.0
295
+ opentelemetry-exporter-otlp-proto-grpc==1.22.0
296
+ opentelemetry-instrumentation==0.43b0
297
+ opentelemetry-instrumentation-asgi==0.43b0
298
+ opentelemetry-instrumentation-fastapi==0.43b0
299
+ opentelemetry-proto==1.22.0
300
+ opentelemetry-sdk==1.22.0
301
+ opentelemetry-semantic-conventions==0.43b0
302
+ opentelemetry-util-http==0.43b0
303
+ orjson==3.9.14
304
+ overrides==7.7.0
305
+ packaging==23.2
306
+ pandas==2.2.0
307
+ pillow==10.2.0
308
+ posthog==3.4.1
309
+ protobuf==4.25.2
310
+ pulsar-client==3.4.0
311
+ pyasn1==0.5.1
312
+ pyasn1-modules==0.3.0
313
+ pydantic==2.6.1
314
+ pydantic_core==2.16.2
315
+ pydub==0.25.1
316
+ pyparsing==3.1.1
317
+ PyPika==0.48.9
318
+ pyproject_hooks==1.0.0
319
+ python-dateutil==2.8.2
320
+ python-dotenv==1.0.1
321
+ python-iso639==2024.2.7
322
+ python-magic==0.4.27
323
+ python-multipart==0.0.9
324
+ pytz==2024.1
325
+ PyYAML==6.0.1
326
+ rapidfuzz==3.6.1
327
+ referencing==0.33.0
328
+ regex==2023.12.25
329
+ requests==2.31.0
330
+ requests-oauthlib==1.3.1
331
+ rpds-py==0.18.0
332
+ rsa==4.9
333
+ s3transfer==0.10.0
334
+ safetensors==0.4.2
335
+ scikit-learn==1.4.0
336
+ scipy==1.12.0
337
+ semantic-version==2.10.0
338
+ sentence-transformers==2.3.1
339
+ sentencepiece==0.1.99
340
+ sgmllib3k==1.0.0
341
+ six==1.16.0
342
+ smmap==5.0.1
343
+ sniffio==1.3.0
344
+ soupsieve==2.5
345
+ SQLAlchemy==2.0.27
346
+ starlette==0.36.3
347
+ sympy==1.12
348
+ tabulate==0.9.0
349
+ tenacity==8.2.3
350
+ threadpoolctl==3.3.0
351
+ tokenizers==0.15.2
352
+ tomli==2.0.1
353
+ toolz==0.12.1
354
+ torch==2.2.0
355
+ tqdm==4.66.2
356
+ transformers==4.37.2
357
+ triton==2.2.0
358
+ typer==0.9.0
359
+ types-requests==2.31.0.20240125
360
+ typing-inspect==0.9.0
361
+ typing_extensions==4.8.0
362
+ tzdata==2024.1
363
+ unstructured==0.12.4
364
+ unstructured-client==0.18.0
365
+ urllib3==2.0.7
366
+ uvicorn==0.27.1
367
+ uvloop==0.19.0
368
+ validators==0.22.0
369
+ watchfiles==0.21.0
370
+ websocket-client==1.7.0
371
+ websockets==11.0.3
372
+ wrapt==1.16.0
373
+ yarl==1.9.4
374
+ zipp==3.17.0