Spaces:
Runtime error
Runtime error
File size: 1,111 Bytes
46d9645 26ba5f9 46d9645 26ba5f9 46d9645 f9b3ca4 26ba5f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import streamlit as st
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
def read_pdf(file):
loader = PyPDFLoader(file)
raw_documents = loader.load()
return raw_documents
st.title('PDF Text Extractor')
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
raw_documents = read_pdf(uploaded_file)
splitter = SentenceTransformersTokenTextSplitter(model_name='dangvantuan/sentence-camembert-large',
chunk_overlap=50
)
documents = splitter.split_documents(raw_documents)
embeddings_fun = HuggingFaceEmbeddings(model_name='dangvantuan/sentence-camembert-large')
# embeddings_text = embeddings_fun.embed_documents(documents)
faiss_db = FAISS.from_documents(documents, embeddings_fun)
query = st.text_input("Entrer une question")
docs = faiss_db.similarity_search(query)
st.text('La reponse à votre question:')
st.write(docs[0].page_content)
|