from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
import streamlit as st

text = st.text_area('enter some text to start')

full_text = open("state_of_the_union.txt", "r").read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_text(full_text)

embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base")
db = Chroma.from_texts(texts, embeddings)
retriever = db.as_retriever()

retrieved_docs = retriever.invoke(
    text
)

if text: 
    st.text(retrieved_docs[0].page_content)