{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import getpass\n", "import os\n", "\n", "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", "os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "import getpass\n", "import os\n", "\n", "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n", "\n", "from langchain_openai import ChatOpenAI\n", "\n", "llm = ChatOpenAI(model=\"gpt-4o-mini\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "ename": "RateLimitError", "evalue": "Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRateLimitError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[14], line 23\u001b[0m\n\u001b[1;32m 21\u001b[0m text_splitter \u001b[38;5;241m=\u001b[39m RecursiveCharacterTextSplitter(chunk_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1000\u001b[39m, chunk_overlap\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m200\u001b[39m)\n\u001b[1;32m 22\u001b[0m splits \u001b[38;5;241m=\u001b[39m text_splitter\u001b[38;5;241m.\u001b[39msplit_documents(docs)\n\u001b[0;32m---> 23\u001b[0m vectorstore \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_documents\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdocuments\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msplits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mOpenAIEmbeddings\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# Retrieve and generate using the relevant snippets of the blog.\u001b[39;00m\n\u001b[1;32m 26\u001b[0m retriever \u001b[38;5;241m=\u001b[39m vectorstore\u001b[38;5;241m.\u001b[39mas_retriever()\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/langchain_chroma/vectorstores.py:1128\u001b[0m, in \u001b[0;36mChroma.from_documents\u001b[0;34m(cls, documents, embedding, ids, collection_name, persist_directory, client_settings, client, collection_metadata, **kwargs)\u001b[0m\n\u001b[1;32m 1126\u001b[0m texts \u001b[38;5;241m=\u001b[39m [doc\u001b[38;5;241m.\u001b[39mpage_content \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m documents]\n\u001b[1;32m 1127\u001b[0m metadatas \u001b[38;5;241m=\u001b[39m [doc\u001b[38;5;241m.\u001b[39mmetadata \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m documents]\n\u001b[0;32m-> 1128\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_texts\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1129\u001b[0m \u001b[43m \u001b[49m\u001b[43mtexts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtexts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1130\u001b[0m \u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43membedding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1131\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadatas\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadatas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1132\u001b[0m \u001b[43m \u001b[49m\u001b[43mids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1133\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcollection_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1134\u001b[0m \u001b[43m \u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpersist_directory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1135\u001b[0m \u001b[43m \u001b[49m\u001b[43mclient_settings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient_settings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1136\u001b[0m \u001b[43m \u001b[49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1137\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcollection_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1138\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1139\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/langchain_chroma/vectorstores.py:1089\u001b[0m, in \u001b[0;36mChroma.from_texts\u001b[0;34m(cls, texts, embedding, metadatas, ids, collection_name, persist_directory, client_settings, client, collection_metadata, **kwargs)\u001b[0m\n\u001b[1;32m 1083\u001b[0m chroma_collection\u001b[38;5;241m.\u001b[39madd_texts(\n\u001b[1;32m 1084\u001b[0m texts\u001b[38;5;241m=\u001b[39mbatch[\u001b[38;5;241m3\u001b[39m] \u001b[38;5;28;01mif\u001b[39;00m batch[\u001b[38;5;241m3\u001b[39m] \u001b[38;5;28;01melse\u001b[39;00m [],\n\u001b[1;32m 1085\u001b[0m metadatas\u001b[38;5;241m=\u001b[39mbatch[\u001b[38;5;241m2\u001b[39m] \u001b[38;5;28;01mif\u001b[39;00m batch[\u001b[38;5;241m2\u001b[39m] \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1086\u001b[0m ids\u001b[38;5;241m=\u001b[39mbatch[\u001b[38;5;241m0\u001b[39m],\n\u001b[1;32m 1087\u001b[0m )\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1089\u001b[0m \u001b[43mchroma_collection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_texts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtexts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtexts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadatas\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadatas\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mids\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m chroma_collection\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/langchain_chroma/vectorstores.py:508\u001b[0m, in \u001b[0;36mChroma.add_texts\u001b[0;34m(self, texts, metadatas, ids, **kwargs)\u001b[0m\n\u001b[1;32m 506\u001b[0m texts \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(texts)\n\u001b[1;32m 507\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_embedding_function \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 508\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_embedding_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membed_documents\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtexts\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 509\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m metadatas:\n\u001b[1;32m 510\u001b[0m \u001b[38;5;66;03m# fill metadatas with empty dicts if somebody\u001b[39;00m\n\u001b[1;32m 511\u001b[0m \u001b[38;5;66;03m# did not specify metadata for all texts\u001b[39;00m\n\u001b[1;32m 512\u001b[0m length_diff \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(texts) \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mlen\u001b[39m(metadatas)\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/langchain_openai/embeddings/base.py:588\u001b[0m, in \u001b[0;36mOpenAIEmbeddings.embed_documents\u001b[0;34m(self, texts, chunk_size)\u001b[0m\n\u001b[1;32m 585\u001b[0m \u001b[38;5;66;03m# NOTE: to keep things simple, we assume the list may contain texts longer\u001b[39;00m\n\u001b[1;32m 586\u001b[0m \u001b[38;5;66;03m# than the maximum context and use length-safe embedding function.\u001b[39;00m\n\u001b[1;32m 587\u001b[0m engine \u001b[38;5;241m=\u001b[39m cast(\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdeployment)\n\u001b[0;32m--> 588\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_len_safe_embeddings\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtexts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/langchain_openai/embeddings/base.py:483\u001b[0m, in \u001b[0;36mOpenAIEmbeddings._get_len_safe_embeddings\u001b[0;34m(self, texts, engine, chunk_size)\u001b[0m\n\u001b[1;32m 481\u001b[0m batched_embeddings: List[List[\u001b[38;5;28mfloat\u001b[39m]] \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 482\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m _iter:\n\u001b[0;32m--> 483\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 484\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtokens\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m_chunk_size\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_invocation_params\u001b[49m\n\u001b[1;32m 485\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, \u001b[38;5;28mdict\u001b[39m):\n\u001b[1;32m 487\u001b[0m response \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mmodel_dump()\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/openai/resources/embeddings.py:124\u001b[0m, in \u001b[0;36mEmbeddings.create\u001b[0;34m(self, input, model, dimensions, encoding_format, user, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 118\u001b[0m embedding\u001b[38;5;241m.\u001b[39membedding \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mfrombuffer( \u001b[38;5;66;03m# type: ignore[no-untyped-call]\u001b[39;00m\n\u001b[1;32m 119\u001b[0m base64\u001b[38;5;241m.\u001b[39mb64decode(data), dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfloat32\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 120\u001b[0m )\u001b[38;5;241m.\u001b[39mtolist()\n\u001b[1;32m 122\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n\u001b[0;32m--> 124\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 125\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/embeddings\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 126\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding_create_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mEmbeddingCreateParams\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 129\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[43m \u001b[49m\u001b[43mpost_parser\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparser\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 133\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 134\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCreateEmbeddingResponse\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 135\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/openai/_base_client.py:1278\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1264\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1265\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1266\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1273\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1274\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1275\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1276\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1277\u001b[0m )\n\u001b[0;32m-> 1278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/openai/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 953\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/openai/_base_client.py:1044\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1042\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1043\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1044\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1049\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1051\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1054\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1055\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/openai/_base_client.py:1093\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1091\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1093\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1099\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/openai/_base_client.py:1044\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1042\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1043\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1044\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1049\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1051\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1054\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1055\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/openai/_base_client.py:1093\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1091\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1093\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1099\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/openai/_base_client.py:1059\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1056\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1058\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1059\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1061\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1062\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1063\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1067\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1068\u001b[0m )\n", "\u001b[0;31mRateLimitError\u001b[0m: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}" ] } ], "source": [ "import bs4\n", "from langchain import hub\n", "from langchain_chroma import Chroma\n", "from langchain_community.document_loaders import WebBaseLoader\n", "from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.runnables import RunnablePassthrough\n", "from langchain_openai import OpenAIEmbeddings\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "\n", "# Load, chunk and index the contents of the blog.\n", "loader = WebBaseLoader(\n", " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", " bs_kwargs=dict(\n", " parse_only=bs4.SoupStrainer(\n", " class_=(\"post-content\", \"post-title\", \"post-header\")\n", " )\n", " ),\n", ")\n", "docs = loader.load()\n", "\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", "splits = text_splitter.split_documents(docs)\n", "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n", "\n", "# Retrieve and generate using the relevant snippets of the blog.\n", "retriever = vectorstore.as_retriever()\n", "prompt = hub.pull(\"rlm/rag-prompt\")\n", "\n", "\n", "def format_docs(docs):\n", " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", "\n", "\n", "rag_chain = (\n", " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", " | prompt\n", " | llm\n", " | StrOutputParser()\n", ")\n", "\n", "rag_chain.invoke(\"What is Task Decomposition?\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "from langchain_community.document_loaders import WebBaseLoader\n", "from bs4 import BeautifulSoup\n", "from langchain_chroma import Chroma\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "from langchain_openai import OpenAIEmbeddings\n", "\n", "\n", "loader = WebBaseLoader(\n", " web_paths=[\n", " \"https://www.economist.com/united-states/2024/11/15/what-would-robert-kennedy-junior-mean-for-american-health\"\n", " ],\n", " bs_kwargs=dict(parse_only=bs4.SoupStrainer([\"article\", \"title\", \"content\"]))\n", ")\n", "docs = loader.load()\n", "\n", "\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", "splits = text_splitter.split_documents(docs)\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(metadata={'source': 'https://www.economist.com/united-states/2024/11/15/what-would-robert-kennedy-junior-mean-for-american-health', 'title': 'What would Robert Kennedy junior mean for American health?'}, page_content='What would Robert Kennedy junior mean for American health?The EconomistThe EconomistSearchUnited States | MAGA meets MAHAWhat would Robert Kennedy junior mean for American health?Donald Trump’s nomination for health secretary is sceptical about vaccines and mainstream medicinePhotograph: Getty Images Nov 15th 2024|WASHINGTON, DCShareAS IN MOST marriages of convenience, Donald Trump and Robert F. Kennedy junior make unusual bedfellows. One enjoys junk food, hates exercise and loves oil. The other talks of clean food, getting America moving again and wants to eliminate oils of all sorts (from seed oil to Mr Trump’s beloved “liquid gold”). One has called the covid-19 vaccine a “miracle”, the other is a long-term vaccine sceptic. Yet on November 14th Mr Trump announced that Mr Kennedy was his pick for secretary of health and human services (HHS).Explore moreDonald TrumpUnited StatesShareReuse this contentDiscover moreBack to the 1850sImmigrant voters may have won America’s presidential'),\n", " Document(metadata={'source': 'https://www.economist.com/united-states/2024/11/15/what-would-robert-kennedy-junior-mean-for-american-health', 'title': 'What would Robert Kennedy junior mean for American health?'}, page_content='pick for secretary of health and human services (HHS).Explore moreDonald TrumpUnited StatesShareReuse this contentDiscover moreBack to the 1850sImmigrant voters may have won America’s presidential election for the nativist candidateClimate change and the next administrationThere are obstacles to Donald Trump’s attempts to reverse progressThe promise Donald Trump is sure to keepHe wants more freedom to fire civil servants, but he should weigh the long-term consequencesSenate Republicans flex their independenceTrump doesn’t weigh in on a leadership contest, but real conflicts will soon comeRepublicans finally win the coveted trifectaYet the party’s grip on power could be less reliable than it appearsThe man picked as defence secretary wants to purge the PentagonPete Hegseth, a Fox News host, takes aim at “woke shit”LinkedInFacebookXInstagramThreadsTikTokYouTubeRSS')]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "splits" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 2 }