{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyM4ysnzp2PKemzKgh131B0g",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/almutareb/InnovationPathfinderAI/blob/main/Copy_of_mixtral_react_agent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "ydmVy2pS_hwU"
      },
      "outputs": [],
      "source": [
        "!pip install -qU langchain_community\n",
        "!pip install -qU langchain\n",
        "!pip install -qU google-search-results\n",
        "!pip install -qU langchainhub\n",
        "!pip install -qU text_generation\n",
        "!pip install -qU arxiv\n",
        "!pip install -qU wikipedia\n",
        "!pip install -qU gradio==3.48.0\n",
        "!pip install -qU youtube_search\n",
        "!pip install -qU sentence_transformers\n",
        "!pip install -qU hromadb"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "from google.colab import userdata\n",
        "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = userdata.get('HUGGINGFACEHUB_API_TOKEN')\n",
        "#os.environ[\"SERPAPI_API_KEY\"] = userdata.get('SERPAPI_API_KEY')\n",
        "os.environ[\"GOOGLE_CSE_ID\"] = userdata.get('GOOGLE_CSE_ID')\n",
        "os.environ[\"GOOGLE_API_KEY\"] = userdata.get('GOOGLE_API_KEY')\n",
        "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
        "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
        "os.environ[\"LANGCHAIN_API_KEY\"] = userdata.get('LANGCHAIN_API_KEY')\n",
        "os.environ[\"LANGCHAIN_PROJECT\"] = \"arxiv_ollama_agent\""
      ],
      "metadata": {
        "id": "JYt3cFVnQiPe"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain.tools import WikipediaQueryRun\n",
        "from langchain_community.utilities import WikipediaAPIWrapper\n",
        "\n",
        "from langchain.tools import Tool\n",
        "from langchain_community.utilities import GoogleSearchAPIWrapper"
      ],
      "metadata": {
        "id": "bpb1dYzBZsRR"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "api_wrapper = WikipediaAPIWrapper()\n",
        "wikipedia = WikipediaQueryRun(api_wrapper=api_wrapper)"
      ],
      "metadata": {
        "id": "_NAkY8FkMHcx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "wikipedia.run(\"large language model\")"
      ],
      "metadata": {
        "id": "ADu6renzI3bi"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "websearch = GoogleSearchAPIWrapper()\n",
        "\n",
        "def top5_results(query):\n",
        "  return websearch.results(query, 5)\n",
        "\n",
        "google_search = Tool(\n",
        "    name=\"google_search\",\n",
        "    description=\"Search Google for recent results.\",\n",
        "    #func=top5_results,\n",
        "    func=websearch.run,\n",
        ")"
      ],
      "metadata": {
        "id": "QtWQgcDpblGx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "google_search.run(\"large language model\")"
      ],
      "metadata": {
        "id": "IVAbbQ04ZE9M"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "wikipedia.args"
      ],
      "metadata": {
        "id": "Cv2z8MFNJ3sD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# HF libraries\n",
        "from langchain.llms import HuggingFaceHub\n",
        "\n",
        "# Load the model from the Hugging Face Hub\n",
        "model_id = HuggingFaceHub(repo_id=\"mistralai/Mixtral-8x7B-Instruct-v0.1\", model_kwargs={\n",
        "    \"temperature\":0.1,\n",
        "    \"max_new_tokens\":1024,\n",
        "    \"repetition_penalty\":1.2,\n",
        "    \"return_full_text\":False\n",
        "    })"
      ],
      "metadata": {
        "id": "JHO0Hr5phBLH"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain import hub\n",
        "from langchain.agents import AgentExecutor, create_react_agent, load_tools\n",
        "from langchain.tools.render import render_text_description\n",
        "from langchain.tools.retriever import create_retriever_tool\n",
        "from langchain.retrievers import ArxivRetriever\n",
        "from langchain.agents.format_scratchpad import format_log_to_str\n",
        "from langchain.agents.output_parsers import (\n",
        "    ReActJsonSingleInputOutputParser,\n",
        ")\n",
        "from langchain.tools import YouTubeSearchTool\n",
        "\n",
        "from langchain_community.chat_message_histories import ChatMessageHistory\n",
        "from langchain_core.runnables.history import RunnableWithMessageHistory\n",
        "\n",
        "message_history = ChatMessageHistory()\n",
        "\n",
        "arxiv_retriever = ArxivRetriever()\n",
        "\n",
        "arxiv_search = create_retriever_tool(\n",
        "    arxiv_retriever,\n",
        "    \"arxiv_database\",\n",
        "    \"Search arxiv database for scientific research papers and studies\",\n",
        ")\n",
        "\n",
        "youtube_search = YouTubeSearchTool()\n",
        "\n",
        "tools = [arxiv_search, wikipedia, google_search]\n",
        "\n",
        "#prompt = hub.pull(\"hwchase17/react\")\n",
        "prompt = hub.pull(\"hwchase17/react-json\")\n",
        "prompt = prompt.partial(\n",
        "    tools=render_text_description(tools),\n",
        "    tool_names=\", \".join([t.name for t in tools]),\n",
        ")\n",
        "chat_model_with_stop = model_id.bind(stop=[\"\\nObservation\"])\n",
        "agent = (\n",
        "    {\n",
        "        \"input\": lambda x: x[\"input\"],\n",
        "        \"agent_scratchpad\": lambda x: format_log_to_str(x[\"intermediate_steps\"]),\n",
        "    }\n",
        "    | prompt\n",
        "    | chat_model_with_stop\n",
        "#    | model_id\n",
        "    | ReActJsonSingleInputOutputParser()\n",
        ")\n",
        "\n",
        "#agent = create_react_agent(model_id, tools, prompt)\n",
        "agent_executor = AgentExecutor(\n",
        "    agent=agent,\n",
        "    tools=tools,\n",
        "    verbose=True,\n",
        "    max_iterations=10,       # cap number of iterations\n",
        "    #max_execution_time=60,  # timout at 60 sec\n",
        "    return_intermediate_steps=True,\n",
        "    handle_parsing_errors=True,\n",
        "    )\n",
        "\n",
        "def stream_output(query):\n",
        "  for chunk in agent_executor.stream({\"input\": query}):\n",
        "      # Agent Action\n",
        "      if \"actions\" in chunk:\n",
        "          for action in chunk[\"actions\"]:\n",
        "              print(\n",
        "                  f\"Calling Tool ```{action.tool}``` with input ```{action.tool_input}```\"\n",
        "              )\n",
        "      # Observation\n",
        "      elif \"steps\" in chunk:\n",
        "          for step in chunk[\"steps\"]:\n",
        "              print(f\"Got result: ```{step.observation}```\")\n",
        "\n",
        "# Chat memory not working yet\n",
        "agent_with_chat_history = RunnableWithMessageHistory(\n",
        "    agent_executor,\n",
        "    lambda session_id: message_history,\n",
        "    input_message_key=\"input\",\n",
        "    history_messages_key=\"chat_history\",\n",
        ")"
      ],
      "metadata": {
        "id": "D4Gj_dZtgzci"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "stream_output(\"what is corrective retrieval augmeneted generation\")"
      ],
      "metadata": {
        "id": "ItAD-n6BnTc6"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "## Youtube search tool, not used yet\n",
        "import ast\n",
        "def you_four(query):\n",
        "  fquery = query+',4'\n",
        "  videos_str = youtube_search.run(fquery)\n",
        "#  video_list.replace('watch?v=','embed/')\n",
        "#  video_list = [word.replace('watch?v=','embed/') for word in video_list]\n",
        "  video_list = convert_urls(videos_str)\n",
        "\n",
        "  return video_list\n",
        "\n",
        "def convert_urls(urls):\n",
        "  # Convert the string representation of the list into an actual list\n",
        "  urls = ast.literal_eval(urls)\n",
        "  #urls = [ for url in urls]\n",
        "  iframes = []\n",
        "  for url in urls:\n",
        "    embed_url = url.replace('watch?v=','embed/')\n",
        "    iframe = f'<iframe width=\"560\" height=\"315\" src=\"{embed_url}\" frameborder=\"0\" allowfullscreen></iframe>'\n",
        "    iframes.append(iframe)\n",
        "  return iframes"
      ],
      "metadata": {
        "id": "3LzQEqeTzH0L"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "list_d=you_four(\"air taxi\")\n",
        "list_d"
      ],
      "metadata": {
        "id": "T7I6eIh318rU"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "agent_with_chat_history.invoke(\n",
        "    {\"input\": \"hi! I'm bob\"},\n",
        "    # This is needed because in most real world scenarios, a session id is needed\n",
        "    # It isn't really used here because we are using a simple in memory ChatMessageHistory\n",
        "    config={\"configurable\": {\"session_id\": \"<foo>\"}},\n",
        ")"
      ],
      "metadata": {
        "id": "7MxiaD6qffZG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "agent_with_chat_history.invoke(\n",
        "    {\"input\": \"what's my name?\"},\n",
        "    # This is needed because in most real world scenarios, a session id is needed\n",
        "    # It isn't really used here because we are using a simple in memory ChatMessageHistory\n",
        "    config={\"configurable\": {\"session_id\": \"<foo>\"}},\n",
        ")"
      ],
      "metadata": {
        "id": "5cjo4j2nfkbQ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "return_txt= agent_executor.invoke(\n",
        "    {\n",
        "         \"input\": \"how could a concept for an airtaxi fleet management look like?\",\n",
        "    }\n",
        ")"
      ],
      "metadata": {
        "id": "-q81PaZijPvO"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "agent_executor.invoke(\n",
        "    {\n",
        "         \"input\": \"What's the latest paper on corrective retrieval augmeneted generation?\"\n",
        "    }\n",
        ")"
      ],
      "metadata": {
        "id": "GCAOXXdPJ_wL"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "import gradio as gr\n",
        "def add_text(history, text):\n",
        "    history = history + [(text, None)]\n",
        "    return history, \"\"\n",
        "\n",
        "def bot(history):\n",
        "    response = infer(history[-1][0], history)\n",
        "    history[-1][1] = response['output']\n",
        "    return history\n",
        "\n",
        "def infer(question, history):\n",
        "    query =  question\n",
        "    result = agent_executor.invoke(\n",
        "        {\n",
        "            \"input\": question,\n",
        "        }\n",
        "    )\n",
        "    return result\n",
        "\n",
        "def you_frame(question):\n",
        "  iframes=you_four(question)\n",
        "  return '\\n'.join(iframes)\n",
        "\n",
        "def vote(data: gr.LikeData):\n",
        "    if data.liked:\n",
        "        print(\"You upvoted this response: \" + data.value)\n",
        "    else:\n",
        "        print(\"You downvoted this response: \" + data.value)\n",
        "\n",
        "css=\"\"\"\n",
        "#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}\n",
        "\"\"\"\n",
        "\n",
        "title = \"\"\"\n",
        "<div style=\"text-align: center;max-width: 700px;\">\n",
        "    <p>Hello Dave, how can I help today?<br />\n",
        "</div>\n",
        "\"\"\"\n",
        "\n",
        "with gr.Blocks(theme=gr.themes.Soft()) as demo:\n",
        "  with gr.Tab(\"Google|Wikipedia|Arxiv\"):\n",
        "    with gr.Column(elem_id=\"col-container\"):\n",
        "        gr.HTML(title)\n",
        "        with gr.Row():\n",
        "            question = gr.Textbox(label=\"Question\", placeholder=\"Type your question and hit Enter \")\n",
        "        chatbot = gr.Chatbot([], elem_id=\"chatbot\")\n",
        "        chatbot.like(vote, None, None)\n",
        "        clear = gr.Button(\"Clear\")\n",
        "    question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(\n",
        "        bot, chatbot, chatbot\n",
        "    )\n",
        "    clear.click(lambda: None, None, chatbot, queue=False)\n",
        "\n",
        "demo.queue()\n",
        "demo.launch(debug=True)"
      ],
      "metadata": {
        "id": "J7xy7c2LcEbe"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}