{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyM4ysnzp2PKemzKgh131B0g", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ydmVy2pS_hwU" }, "outputs": [], "source": [ "!pip install -qU langchain_community\n", "!pip install -qU langchain\n", "!pip install -qU google-search-results\n", "!pip install -qU langchainhub\n", "!pip install -qU text_generation\n", "!pip install -qU arxiv\n", "!pip install -qU wikipedia\n", "!pip install -qU gradio==3.48.0\n", "!pip install -qU youtube_search\n", "!pip install -qU sentence_transformers\n", "!pip install -qU hromadb" ] }, { "cell_type": "code", "source": [ "import os\n", "from google.colab import userdata\n", "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = userdata.get('HUGGINGFACEHUB_API_TOKEN')\n", "#os.environ[\"SERPAPI_API_KEY\"] = userdata.get('SERPAPI_API_KEY')\n", "os.environ[\"GOOGLE_CSE_ID\"] = userdata.get('GOOGLE_CSE_ID')\n", "os.environ[\"GOOGLE_API_KEY\"] = userdata.get('GOOGLE_API_KEY')\n", "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n", "os.environ[\"LANGCHAIN_API_KEY\"] = userdata.get('LANGCHAIN_API_KEY')\n", "os.environ[\"LANGCHAIN_PROJECT\"] = \"arxiv_ollama_agent\"" ], "metadata": { "id": "JYt3cFVnQiPe" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from langchain.tools import WikipediaQueryRun\n", "from langchain_community.utilities import WikipediaAPIWrapper\n", "\n", "from langchain.tools import Tool\n", "from langchain_community.utilities import GoogleSearchAPIWrapper" ], "metadata": { "id": "bpb1dYzBZsRR" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "api_wrapper = WikipediaAPIWrapper()\n", "wikipedia = WikipediaQueryRun(api_wrapper=api_wrapper)" ], "metadata": { "id": "_NAkY8FkMHcx" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "wikipedia.run(\"large language model\")" ], "metadata": { "id": "ADu6renzI3bi" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "websearch = GoogleSearchAPIWrapper()\n", "\n", "def top5_results(query):\n", " return websearch.results(query, 5)\n", "\n", "google_search = Tool(\n", " name=\"google_search\",\n", " description=\"Search Google for recent results.\",\n", " #func=top5_results,\n", " func=websearch.run,\n", ")" ], "metadata": { "id": "QtWQgcDpblGx" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "google_search.run(\"large language model\")" ], "metadata": { "id": "IVAbbQ04ZE9M" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "wikipedia.args" ], "metadata": { "id": "Cv2z8MFNJ3sD" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# HF libraries\n", "from langchain.llms import HuggingFaceHub\n", "\n", "# Load the model from the Hugging Face Hub\n", "model_id = HuggingFaceHub(repo_id=\"mistralai/Mixtral-8x7B-Instruct-v0.1\", model_kwargs={\n", " \"temperature\":0.1,\n", " \"max_new_tokens\":1024,\n", " \"repetition_penalty\":1.2,\n", " \"return_full_text\":False\n", " })" ], "metadata": { "id": "JHO0Hr5phBLH" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from langchain import hub\n", "from langchain.agents import AgentExecutor, create_react_agent, load_tools\n", "from langchain.tools.render import render_text_description\n", "from langchain.tools.retriever import create_retriever_tool\n", "from langchain.retrievers import ArxivRetriever\n", "from langchain.agents.format_scratchpad import format_log_to_str\n", "from langchain.agents.output_parsers import (\n", " ReActJsonSingleInputOutputParser,\n", ")\n", "from langchain.tools import YouTubeSearchTool\n", "\n", "from langchain_community.chat_message_histories import ChatMessageHistory\n", "from langchain_core.runnables.history import RunnableWithMessageHistory\n", "\n", "message_history = ChatMessageHistory()\n", "\n", "arxiv_retriever = ArxivRetriever()\n", "\n", "arxiv_search = create_retriever_tool(\n", " arxiv_retriever,\n", " \"arxiv_database\",\n", " \"Search arxiv database for scientific research papers and studies\",\n", ")\n", "\n", "youtube_search = YouTubeSearchTool()\n", "\n", "tools = [arxiv_search, wikipedia, google_search]\n", "\n", "#prompt = hub.pull(\"hwchase17/react\")\n", "prompt = hub.pull(\"hwchase17/react-json\")\n", "prompt = prompt.partial(\n", " tools=render_text_description(tools),\n", " tool_names=\", \".join([t.name for t in tools]),\n", ")\n", "chat_model_with_stop = model_id.bind(stop=[\"\\nObservation\"])\n", "agent = (\n", " {\n", " \"input\": lambda x: x[\"input\"],\n", " \"agent_scratchpad\": lambda x: format_log_to_str(x[\"intermediate_steps\"]),\n", " }\n", " | prompt\n", " | chat_model_with_stop\n", "# | model_id\n", " | ReActJsonSingleInputOutputParser()\n", ")\n", "\n", "#agent = create_react_agent(model_id, tools, prompt)\n", "agent_executor = AgentExecutor(\n", " agent=agent,\n", " tools=tools,\n", " verbose=True,\n", " max_iterations=10, # cap number of iterations\n", " #max_execution_time=60, # timout at 60 sec\n", " return_intermediate_steps=True,\n", " handle_parsing_errors=True,\n", " )\n", "\n", "def stream_output(query):\n", " for chunk in agent_executor.stream({\"input\": query}):\n", " # Agent Action\n", " if \"actions\" in chunk:\n", " for action in chunk[\"actions\"]:\n", " print(\n", " f\"Calling Tool ```{action.tool}``` with input ```{action.tool_input}```\"\n", " )\n", " # Observation\n", " elif \"steps\" in chunk:\n", " for step in chunk[\"steps\"]:\n", " print(f\"Got result: ```{step.observation}```\")\n", "\n", "# Chat memory not working yet\n", "agent_with_chat_history = RunnableWithMessageHistory(\n", " agent_executor,\n", " lambda session_id: message_history,\n", " input_message_key=\"input\",\n", " history_messages_key=\"chat_history\",\n", ")" ], "metadata": { "id": "D4Gj_dZtgzci" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "stream_output(\"what is corrective retrieval augmeneted generation\")" ], "metadata": { "id": "ItAD-n6BnTc6" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "## Youtube search tool, not used yet\n", "import ast\n", "def you_four(query):\n", " fquery = query+',4'\n", " videos_str = youtube_search.run(fquery)\n", "# video_list.replace('watch?v=','embed/')\n", "# video_list = [word.replace('watch?v=','embed/') for word in video_list]\n", " video_list = convert_urls(videos_str)\n", "\n", " return video_list\n", "\n", "def convert_urls(urls):\n", " # Convert the string representation of the list into an actual list\n", " urls = ast.literal_eval(urls)\n", " #urls = [ for url in urls]\n", " iframes = []\n", " for url in urls:\n", " embed_url = url.replace('watch?v=','embed/')\n", " iframe = f''\n", " iframes.append(iframe)\n", " return iframes" ], "metadata": { "id": "3LzQEqeTzH0L" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "list_d=you_four(\"air taxi\")\n", "list_d" ], "metadata": { "id": "T7I6eIh318rU" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "agent_with_chat_history.invoke(\n", " {\"input\": \"hi! I'm bob\"},\n", " # This is needed because in most real world scenarios, a session id is needed\n", " # It isn't really used here because we are using a simple in memory ChatMessageHistory\n", " config={\"configurable\": {\"session_id\": \"\"}},\n", ")" ], "metadata": { "id": "7MxiaD6qffZG" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "agent_with_chat_history.invoke(\n", " {\"input\": \"what's my name?\"},\n", " # This is needed because in most real world scenarios, a session id is needed\n", " # It isn't really used here because we are using a simple in memory ChatMessageHistory\n", " config={\"configurable\": {\"session_id\": \"\"}},\n", ")" ], "metadata": { "id": "5cjo4j2nfkbQ" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "return_txt= agent_executor.invoke(\n", " {\n", " \"input\": \"how could a concept for an airtaxi fleet management look like?\",\n", " }\n", ")" ], "metadata": { "id": "-q81PaZijPvO" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "agent_executor.invoke(\n", " {\n", " \"input\": \"What's the latest paper on corrective retrieval augmeneted generation?\"\n", " }\n", ")" ], "metadata": { "id": "GCAOXXdPJ_wL" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "\n", "import gradio as gr\n", "def add_text(history, text):\n", " history = history + [(text, None)]\n", " return history, \"\"\n", "\n", "def bot(history):\n", " response = infer(history[-1][0], history)\n", " history[-1][1] = response['output']\n", " return history\n", "\n", "def infer(question, history):\n", " query = question\n", " result = agent_executor.invoke(\n", " {\n", " \"input\": question,\n", " }\n", " )\n", " return result\n", "\n", "def you_frame(question):\n", " iframes=you_four(question)\n", " return '\\n'.join(iframes)\n", "\n", "def vote(data: gr.LikeData):\n", " if data.liked:\n", " print(\"You upvoted this response: \" + data.value)\n", " else:\n", " print(\"You downvoted this response: \" + data.value)\n", "\n", "css=\"\"\"\n", "#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}\n", "\"\"\"\n", "\n", "title = \"\"\"\n", "
\n", "\"\"\"\n", "\n", "with gr.Blocks(theme=gr.themes.Soft()) as demo:\n", " with gr.Tab(\"Google|Wikipedia|Arxiv\"):\n", " with gr.Column(elem_id=\"col-container\"):\n", " gr.HTML(title)\n", " with gr.Row():\n", " question = gr.Textbox(label=\"Question\", placeholder=\"Type your question and hit Enter \")\n", " chatbot = gr.Chatbot([], elem_id=\"chatbot\")\n", " chatbot.like(vote, None, None)\n", " clear = gr.Button(\"Clear\")\n", " question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(\n", " bot, chatbot, chatbot\n", " )\n", " clear.click(lambda: None, None, chatbot, queue=False)\n", "\n", "demo.queue()\n", "demo.launch(debug=True)" ], "metadata": { "id": "J7xy7c2LcEbe" }, "execution_count": null, "outputs": [] } ] }