testspace / app.py
johannes123213's picture
Update app.py
9f56d8d
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
import streamlit as st
text = st.text_area('enter some text to start')
full_text = open("state_of_the_union.txt", "r").read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_text(full_text)
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base")
db = Chroma.from_texts(texts, embeddings)
retriever = db.as_retriever()
retrieved_docs = retriever.invoke(
text
)
if text:
st.text(retrieved_docs[0].page_content)