from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Chroma import streamlit as st text = st.'enter some text to start' full_text = open("state_of_the_union.txt", "r").read() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) texts = text_splitter.split_text(full_text) embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base") db = Chroma.from_texts(texts, embeddings) retriever = db.as_retriever() retrieved_docs = retriever.invoke( text ) if text: st.json(retrieved_docs)