from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Chroma import streamlit as st text = st.text_area('enter some text to start') full_text = open("state_of_the_union.txt", "r").read() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) texts = text_splitter.split_text(full_text) embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base") db = Chroma.from_texts(texts, embeddings) retriever = db.as_retriever() retrieved_docs = retriever.invoke( text ) if text: st.text(retrieved_docs[0].page_content)