{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "aaa7e27e-7632-4024-9764-a4404293fd05", "metadata": {}, "outputs": [], "source": [ "import openai\n", "from llama_index.core import SimpleDirectoryReader\n", "from llama_index.core import Document\n", "from llama_index.core import VectorStoreIndex\n", "from llama_index.core import ServiceContext\n", "from llama_index.llms.openai import OpenAI\n", "\n", "from llama_index.legacy.embeddings import HuggingFaceEmbedding\n", "from llama_index.core import StorageContext, load_index_from_storage\n", "\n", "import time" ] }, { "cell_type": "code", "execution_count": null, "id": "8a2abdeb-9ff3-4793-8a14-92d8d06391f9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "ca5606cf-16ec-4ae1-a057-25fab105d545", "metadata": {}, "source": [ "### Run Indexing in memory without using vectorstore" ] }, { "cell_type": "code", "execution_count": null, "id": "a2d79bc4-1c0f-4354-ae6c-fe3697a28c9d", "metadata": {}, "outputs": [], "source": [ "start_time = time.time()" ] }, { "cell_type": "code", "execution_count": null, "id": "219bf47e-2df4-46e2-b6c4-b0406820fe8c", "metadata": {}, "outputs": [], "source": [ "documents = SimpleDirectoryReader(input_files=[\"../raw_documents/HI_Knowledge_Base.pdf\"]).load_data()\n", "document = Document(text=\"\\n\\n\".join([doc.text for doc in documents]))" ] }, { "cell_type": "code", "execution_count": null, "id": "6e0e2c29-98c5-48c8-9f1c-b41ca0a1e904", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "ad4edb79-f29b-4556-9e69-afa8917568c7", "metadata": {}, "outputs": [], "source": [ "llm = OpenAI(model=\"gpt-3.5-turbo-1106\", temperature=0.1)" ] }, { "cell_type": "code", "execution_count": null, "id": "f4911316-bc1b-4e65-a66e-c83e70c6fa00", "metadata": {}, "outputs": [], "source": [ "embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")" ] }, { "cell_type": "code", "execution_count": null, "id": "6a441a1f-91d8-49ec-999d-36adbccbfa90", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "942f4406-4b24-4c4e-a803-e0a9a5f31f6c", "metadata": {}, "outputs": [], "source": [ "# service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\n", "service_context = ServiceContext.from_defaults(llm=None, embed_model=embed_model)\n", "index = VectorStoreIndex.from_documents([document], service_context=service_context)" ] }, { "cell_type": "code", "execution_count": null, "id": "630afd70-7aad-4049-950e-096976e4f7fa", "metadata": {}, "outputs": [], "source": [ "indexing_cost = time.time() - start_time\n", "indexing_cost = indexing_cost / 60\n", "print(f\"indexing time: {indexing_cost:.1f} mins\")" ] }, { "cell_type": "code", "execution_count": null, "id": "24691a2c-00d9-4121-81f8-8aacbe764ae8", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "36f81a58-4249-43bd-97e2-781b1d08b6d9", "metadata": {}, "outputs": [], "source": [ "index.storage_context.persist(persist_dir=\"../models/llama_index_json/\")" ] }, { "cell_type": "code", "execution_count": null, "id": "dca06682-8fa1-4cf2-81a9-4c7d223d2032", "metadata": {}, "outputs": [], "source": [ "query_engine = index.as_query_engine()" ] }, { "cell_type": "code", "execution_count": null, "id": "f2553fd3-dc88-4a75-a85c-ab095ae3fba8", "metadata": {}, "outputs": [], "source": [ "response = query_engine.query(\"What is medishield\")" ] }, { "cell_type": "code", "execution_count": null, "id": "c8c6f966-6196-4af2-aaac-042e327bb046", "metadata": {}, "outputs": [], "source": [ "print(str(response))" ] }, { "cell_type": "code", "execution_count": null, "id": "b9365c5a-fcbb-4581-997b-d23723175129", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "94977d29-431f-4704-a93f-3b96a1e70cd0", "metadata": {}, "source": [ "### Load index from persisted file without using vectorstore" ] }, { "cell_type": "code", "execution_count": null, "id": "c5348da7-59e8-45d1-afee-d50ff58549cb", "metadata": {}, "outputs": [], "source": [ "start_time = time.time()" ] }, { "cell_type": "code", "execution_count": null, "id": "0d51d2d5-9523-46dd-9bc4-b013bba42735", "metadata": {}, "outputs": [], "source": [ "embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")" ] }, { "cell_type": "code", "execution_count": null, "id": "c2a72bfe-8e3e-4b26-8cbe-4e55b1fbab34", "metadata": {}, "outputs": [], "source": [ "service_context = ServiceContext.from_defaults(llm=None, embed_model=embed_model)" ] }, { "cell_type": "code", "execution_count": null, "id": "7a460ce0-a3a2-47ec-95bb-472c61d9f23a", "metadata": {}, "outputs": [], "source": [ "storage_context = StorageContext.from_defaults(persist_dir=\"../models/llama_index_json/\")" ] }, { "cell_type": "code", "execution_count": null, "id": "1331b3e5-7b3a-4f0f-80f0-f8a6810431af", "metadata": {}, "outputs": [], "source": [ "index = load_index_from_storage(storage_context=storage_context, service_context=service_context)" ] }, { "cell_type": "code", "execution_count": null, "id": "1e9b0825-eff8-42cd-b014-0f612b268d42", "metadata": {}, "outputs": [], "source": [ "load_indexing_cost = time.time() - start_time\n", "load_indexing_cost = load_indexing_cost / 60\n", "print(f\"Load indexing time: {load_indexing_cost:.1f} mins\")" ] }, { "cell_type": "code", "execution_count": null, "id": "7df3557b-c79f-45ad-9216-8d1502a5bc8e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "f77c3494-65a6-44fc-ab6e-de64153300f0", "metadata": {}, "source": [ "### Instantiate query engine" ] }, { "cell_type": "code", "execution_count": null, "id": "891f6fe1-ef71-44f8-b6a8-03d8abdcadae", "metadata": {}, "outputs": [], "source": [ "query_engine = index.as_query_engine()" ] }, { "cell_type": "code", "execution_count": null, "id": "a6ad3fbb-8bb0-4115-88e5-4b272b1ba422", "metadata": { "scrolled": true }, "outputs": [], "source": [ "response = query_engine.query(\"What is medishield\")\n", "print(str(response))" ] }, { "cell_type": "code", "execution_count": null, "id": "88fc7e00-64af-4910-a1d0-34a36e110b05", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "34a4ea9f-73d0-4733-847e-89fb4a051294", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "4730d2e7-dfa9-452d-ae20-95a24b6fd8bd", "metadata": {}, "source": [ "### Create vectorstore based on default storage" ] }, { "cell_type": "code", "execution_count": null, "id": "67e3e5ea-8b6d-4493-91ca-f0ced02971e1", "metadata": {}, "outputs": [], "source": [ "from llama_index.core import (\n", " SimpleDirectoryReader,\n", " VectorStoreIndex,\n", " StorageContext,\n", " load_index_from_storage,\n", ")\n", "\n", "from llama_index.core.tools import QueryEngineTool, ToolMetadata" ] }, { "cell_type": "code", "execution_count": null, "id": "d8d4ee04-37be-4014-8403-27ef467462b1", "metadata": {}, "outputs": [], "source": [ "documents = SimpleDirectoryReader(input_files=[\n", " \"../raw_documents/HI Chapter Summary Version 1.3.pdf\",\n", " \"../raw_documents/qna.txt\"\n", " ]).load_data()" ] }, { "cell_type": "code", "execution_count": null, "id": "d5f6a8b0-0798-42ab-9741-caf52a0bae0c", "metadata": {}, "outputs": [], "source": [ "hi_index = VectorStoreIndex.from_documents(documents)" ] }, { "cell_type": "code", "execution_count": null, "id": "3e9f4da3-32af-4e1a-9b28-bfe2c51cc7e1", "metadata": {}, "outputs": [], "source": [ "hi_index.storage_context.persist(persist_dir=\"../models/default_storage\")" ] }, { "cell_type": "code", "execution_count": null, "id": "6f032e21-7f98-4752-ad55-3607fd40b7d5", "metadata": {}, "outputs": [], "source": [ "hi_engine = hi_index.as_query_engine(similarity_top_k=3)" ] }, { "cell_type": "code", "execution_count": null, "id": "3c599039-8cdf-449c-b3d8-f0d41bffdffc", "metadata": {}, "outputs": [], "source": [ "response = hi_engine.query(\"what is the healthcare philosophy in singapore\")\n", "response" ] }, { "cell_type": "code", "execution_count": null, "id": "0757962c-235b-49b7-a92d-49c474b79731", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "b77caa5b-16eb-48ec-8f74-0fc6ca03efbd", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "f0c97e94-5dc5-443e-94a2-ce28aad99a3c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c19cc1b2-4669-4019-9fea-8e8aa3c74c6f", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "8acae3ed-2953-45a3-aba9-0327b6ae3679", "metadata": {}, "source": [ "### ChromaDB method - create vectorstore based on Chroma" ] }, { "cell_type": "code", "execution_count": null, "id": "7de9c591-5a77-4bbe-80f1-4897e15f0b97", "metadata": {}, "outputs": [], "source": [ "import chromadb\n", "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n", "from llama_index.vector_stores.chroma.base import ChromaVectorStore\n", "from llama_index.core import StorageContext\n", "from llama_index.core import ServiceContext\n", "from llama_index.core import Document\n", "\n", "from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding\n", "from llama_index.core import Settings\n", "\n", "import nest_asyncio\n", "\n", "nest_asyncio.apply()\n", "\n", "\n", "import time" ] }, { "cell_type": "code", "execution_count": null, "id": "3e65dff6-77b6-4be8-8857-5cecf3a035bb", "metadata": {}, "outputs": [], "source": [ "# load some documents\n", "documents = SimpleDirectoryReader(input_files=[\n", " \"../raw_documents/HI_Knowledge_Base.pdf\",\n", " \"../raw_documents/HI Chapter Summary Version 1.3.pdf\",\n", " \"../raw_documents/qna.txt\"\n", " ]).load_data()\n", "document = Document(text=\"\\n\\n\".join([doc.text for doc in documents]))" ] }, { "cell_type": "code", "execution_count": null, "id": "bd86b3f5-1dfc-4257-bd9c-86d34f02398d", "metadata": {}, "outputs": [], "source": [ "# initialize client, setting path to save data\n", "db = chromadb.PersistentClient(path=\"../models/chroma_db\")" ] }, { "cell_type": "code", "execution_count": null, "id": "f568ce7b-bcbf-455c-acf1-6c2cae129fed", "metadata": {}, "outputs": [], "source": [ "# create collection\n", "chroma_collection = db.get_or_create_collection(\"quickstart\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ed0b018e-1982-46b2-b1b4-04f5c0ce8672", "metadata": {}, "outputs": [], "source": [ "# assign chroma as the vector_store to the context\n", "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)" ] }, { "cell_type": "code", "execution_count": null, "id": "eb5edab2-30db-4bf7-96b5-4005d3161988", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "b4adc76c-b18d-4a3f-8563-127074491ba9", "metadata": {}, "outputs": [], "source": [ "# embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n", "# embed_model = HuggingFaceEmbedding(model_name=\"local:../models/fine-tuned-embeddings\")" ] }, { "cell_type": "code", "execution_count": null, "id": "0946b6ce-96ab-44de-ad75-e424a8429f67", "metadata": {}, "outputs": [], "source": [ "Settings.llm = None\n", "Settings.chunk_size = 1024\n", "Settings.embed_model = \"local:../models/fine-tuned-embeddings\"" ] }, { "cell_type": "code", "execution_count": null, "id": "b8c73a2c-1129-406a-8046-085afcaf9cbb", "metadata": {}, "outputs": [], "source": [ "nodes = Settings.node_parser.get_nodes_from_documents(documents)" ] }, { "cell_type": "code", "execution_count": null, "id": "adfe688f-95c0-477c-a9de-e9e77541a1d7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "dab4c6f3-ef67-4d90-b3d5-e290c5d1b6f4", "metadata": {}, "outputs": [], "source": [ "storage_context = StorageContext.from_defaults(vector_store=vector_store)" ] }, { "cell_type": "code", "execution_count": null, "id": "6a764113-ad7e-4674-aa57-ebbf405902a8", "metadata": {}, "outputs": [], "source": [ "storage_context.docstore.add_documents(nodes)" ] }, { "cell_type": "code", "execution_count": null, "id": "38e7c88d-6c45-4275-8293-d09b4b85a7cf", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "e492ed4a-23a3-47d6-8b50-51fb48b3aa05", "metadata": {}, "outputs": [], "source": [ "start_time = time.time()" ] }, { "cell_type": "code", "execution_count": null, "id": "cbd11b89-9b83-4f08-bb30-160f750f2ffb", "metadata": {}, "outputs": [], "source": [ "vector_index = VectorStoreIndex(nodes, storage_context=storage_context)" ] }, { "cell_type": "code", "execution_count": null, "id": "082a0d7e-b025-4db1-be2a-7a0b7bc453b9", "metadata": {}, "outputs": [], "source": [ "vector_query_engine = vector_index.as_query_engine()" ] }, { "cell_type": "code", "execution_count": null, "id": "d3bd848d-9985-4a3d-bdc4-ec340cc69ef3", "metadata": {}, "outputs": [], "source": [ "indexing_cost = time.time() - start_time\n", "indexing_cost = indexing_cost / 60\n", "print(f\"Indexing time: {indexing_cost:.1f} mins\")" ] }, { "cell_type": "code", "execution_count": null, "id": "3290e870-41d7-49c4-9c4f-cb16bd1f469e", "metadata": {}, "outputs": [], "source": [ "response = vector_query_engine.query(\"what is the healthcare philosophy in singapore\")\n", "response" ] }, { "cell_type": "code", "execution_count": null, "id": "131d907a-0677-4ad8-b3f7-6fc9b9c5d0a5", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "f95e42ff-374b-4c57-8333-137a6205a6ad", "metadata": {}, "outputs": [], "source": [ "start_time = time.time()" ] }, { "cell_type": "code", "execution_count": null, "id": "82fc724e-4b03-433d-ada4-d451e13e25e9", "metadata": {}, "outputs": [], "source": [ "# create your index\n", "service_context = ServiceContext.from_defaults(llm=None, embed_model=\"local:../models/fine-tuned-embeddings\")\n", "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", "index = VectorStoreIndex.from_documents(\n", " documents, service_context=service_context, storage_context=storage_context\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "1ff50855-e043-4736-87c7-0ef8c11bbb26", "metadata": {}, "outputs": [], "source": [ "indexing_cost = time.time() - start_time\n", "indexing_cost = indexing_cost / 60\n", "print(f\"Indexing time: {indexing_cost:.1f} mins\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ccbfa64b-6e09-40c4-b6bf-18055eaa6735", "metadata": {}, "outputs": [], "source": [ "# create a query engine and query\n", "query_engine = index.as_query_engine()" ] }, { "cell_type": "code", "execution_count": null, "id": "3f16351f-7c28-4b8f-9050-3c90a40998c5", "metadata": {}, "outputs": [], "source": [ "retriever = index.as_retriever()" ] }, { "cell_type": "code", "execution_count": null, "id": "3d07f753-1643-4c18-b368-6c55f4a7968a", "metadata": {}, "outputs": [], "source": [ "r_list = retriever.retrieve(\"what is the healthcare philosophy in singapore\")" ] }, { "cell_type": "code", "execution_count": null, "id": "74b4367a-185f-42cf-9951-48325378adf0", "metadata": { "scrolled": true }, "outputs": [], "source": [ "r_list[0].to_dict()" ] }, { "cell_type": "code", "execution_count": null, "id": "0d058fa9-2608-4508-9a6c-dd8ff9387987", "metadata": {}, "outputs": [], "source": [ "response = query_engine.query(\"what is the healthcare philosophy in singapore\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ec91bf3e-ce05-4183-9270-e53c1a21ccb4", "metadata": {}, "outputs": [], "source": [ "print(response)" ] }, { "cell_type": "code", "execution_count": null, "id": "08fb2be5-3a44-4bb8-a9fc-61d7f03b7a35", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "a7fc01f6-4738-415b-a96b-afd6cf8d789a", "metadata": {}, "source": [ "### ChromaDB method - load vectorstore based on Chroma" ] }, { "cell_type": "code", "execution_count": null, "id": "c1a42c35-5f57-423c-8fb7-7d18b3b466b5", "metadata": {}, "outputs": [], "source": [ "import chromadb\n", "from llama_index import VectorStoreIndex, SimpleDirectoryReader\n", "from llama_index.vector_stores import ChromaVectorStore\n", "from llama_index.storage.storage_context import StorageContext\n", "from llama_index import ServiceContext\n", "from llama_index import Document\n", "\n", "from llama_index.embeddings import HuggingFaceEmbedding\n", "\n", "import time" ] }, { "cell_type": "code", "execution_count": null, "id": "11ff0889-ef46-4447-ae2e-6fcaaf0733ec", "metadata": {}, "outputs": [], "source": [ "embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")" ] }, { "cell_type": "code", "execution_count": null, "id": "6c98a573-b401-4191-99c0-1216833bb566", "metadata": {}, "outputs": [], "source": [ "from llama_index.llms import OpenAI\n", "from llama_index.memory import ChatMemoryBuffer\n", "llm = OpenAI(model=\"gpt-3.5-turbo-1106\", temperature=0.0)" ] }, { "cell_type": "code", "execution_count": null, "id": "ffdbf912-7eb2-429a-a98e-5e3a9e8fe8bd", "metadata": {}, "outputs": [], "source": [ "service_context = ServiceContext.from_defaults(llm=None, embed_model=embed_model)" ] }, { "cell_type": "code", "execution_count": null, "id": "b7b302bf-111d-46a0-95e0-a148cb327ad2", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "2159a2b6-494b-41b9-ac54-dd342bfb74ba", "metadata": {}, "outputs": [], "source": [ "db = chromadb.PersistentClient(path=\"../models/chroma_db\")" ] }, { "cell_type": "code", "execution_count": null, "id": "1b385644-b46e-4d13-88fa-9f4af39db405", "metadata": {}, "outputs": [], "source": [ "chroma_collection = db.get_or_create_collection(\"quickstart\")" ] }, { "cell_type": "code", "execution_count": null, "id": "93cb53d1-6b8c-4b2d-a839-53501c0d54b2", "metadata": {}, "outputs": [], "source": [ "# assign chroma as the vector_store to the context\n", "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n", "storage_context = StorageContext.from_defaults(vector_store=vector_store)" ] }, { "cell_type": "code", "execution_count": null, "id": "c40d59e1-6d42-41f0-8c9b-70aa026093ae", "metadata": {}, "outputs": [], "source": [ "# create your index\n", "index = VectorStoreIndex.from_vector_store(\n", " vector_store=vector_store, service_context=service_context, storage_context=storage_context\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "73ba6d06-ba69-4b5e-962a-9cf7d2dc4d94", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "ab778a5d-d438-4f39-88f5-c67a1f1d575e", "metadata": {}, "outputs": [], "source": [ "system_content = (\"You are a helpful study assistant. \"\n", " \"You do not respond as 'User' or pretend to be 'User'. \"\n", " \"You only respond once as 'Assistant'.\"\n", ")\n", "memory = ChatMemoryBuffer.from_defaults(token_limit=15000)\n", "chat_engine = index.as_chat_engine(\n", " chat_mode=\"context\",\n", " memory=memory,\n", " system_prompt=system_content\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "8d6de457-43b5-4ea7-b5e3-150abe918671", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "78a5c9f6-a63b-40d6-a43f-0cf4bb7b15a6", "metadata": {}, "outputs": [], "source": [ "# create a query engine\n", "query_engine = index.as_query_engine()" ] }, { "cell_type": "code", "execution_count": null, "id": "58315ac6-c0bc-424d-b4b0-90123ebc57df", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "7bb7c21a-7461-40c1-87a7-4a1f92f70153", "metadata": { "scrolled": true }, "outputs": [], "source": [ "response = query_engine.query(\"What is llama2?\")\n", "print(response)" ] }, { "cell_type": "code", "execution_count": null, "id": "874a39ce-e682-42fa-8085-646bacea6cdb", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "301e8270-783d-4942-a05f-9683ca96fbda", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "506672cc-f447-414d-9c57-cd62a964dea8", "metadata": {}, "source": [ "### ChromaDB method - load vectorstore with LLM" ] }, { "cell_type": "code", "execution_count": null, "id": "d9c4a50e-915c-492d-be69-e4ebfd16744a", "metadata": {}, "outputs": [], "source": [ "import chromadb\n", "from llama_index import VectorStoreIndex, SimpleDirectoryReader\n", "from llama_index.vector_stores import ChromaVectorStore\n", "from llama_index.storage.storage_context import StorageContext\n", "from llama_index import ServiceContext\n", "from llama_index import Document\n", "\n", "from llama_index.embeddings import HuggingFaceEmbedding\n", "\n", "import time" ] }, { "cell_type": "code", "execution_count": null, "id": "97680b61-d87a-426d-9177-3670688e8e0c", "metadata": {}, "outputs": [], "source": [ "embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")" ] }, { "cell_type": "code", "execution_count": null, "id": "808fa41d-2b3f-40ab-8cd3-01565b6d6e35", "metadata": {}, "outputs": [], "source": [ "from llama_index.llms import OpenAI\n", "from llama_index.memory import ChatMemoryBuffer\n", "llm = OpenAI(model=\"gpt-3.5-turbo-1106\", temperature=0.0)" ] }, { "cell_type": "code", "execution_count": null, "id": "497b02bd-3ec7-4a4e-8af9-6417437a4bce", "metadata": {}, "outputs": [], "source": [ "service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)" ] }, { "cell_type": "code", "execution_count": null, "id": "51d64b76-628e-418c-b394-807ea9cafd6c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c0b28d70-c43d-4542-9e1b-4ce29a60f9d3", "metadata": {}, "outputs": [], "source": [ "db = chromadb.PersistentClient(path=\"../models/chroma_db\")" ] }, { "cell_type": "code", "execution_count": null, "id": "6f1d4e93-0d74-456a-9c1d-938405a8ec9a", "metadata": {}, "outputs": [], "source": [ "chroma_collection = db.get_or_create_collection(\"quickstart\")" ] }, { "cell_type": "code", "execution_count": null, "id": "da0dd3b7-d798-4c0f-b735-cf1e67094c46", "metadata": {}, "outputs": [], "source": [ "# assign chroma as the vector_store to the context\n", "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n", "storage_context = StorageContext.from_defaults(vector_store=vector_store)" ] }, { "cell_type": "code", "execution_count": null, "id": "0d62e372-8a33-4609-9ac4-fee3cbc4e8a9", "metadata": {}, "outputs": [], "source": [ "# create your index\n", "index = VectorStoreIndex.from_vector_store(\n", " vector_store=vector_store, service_context=service_context, storage_context=storage_context\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "26dedd3b-44f3-4a67-865a-693cd6d0a9ea", "metadata": {}, "outputs": [], "source": [ "system_content = (\"You are a helpful study assistant. \"\n", " \"You do not respond as 'User' or pretend to be 'User'. \"\n", " \"You only respond once as 'Assistant'.\"\n", ")\n", "memory = ChatMemoryBuffer.from_defaults(token_limit=15000)\n", "chat_engine = index.as_chat_engine(\n", " chat_mode=\"context\",\n", " memory=memory,\n", " system_prompt=system_content\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "9e3da625-283a-4d57-a449-d5aa17d0c188", "metadata": {}, "outputs": [], "source": [ "response = chat_engine.stream_chat(\"are you there?\")" ] }, { "cell_type": "code", "execution_count": null, "id": "62ed7a14-261f-4c68-8578-5dfb74bcfc58", "metadata": {}, "outputs": [], "source": [ "for r in response.response_gen:\n", " print(r, end=\"\")" ] }, { "cell_type": "code", "execution_count": null, "id": "1d4ba65c-3135-4b96-a342-c5546949cb72", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "9ca2555f-6975-4bc1-b804-c0c9beb2a515", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" } }, "nbformat": 4, "nbformat_minor": 5 }