TypeError: _load_sbert_model() got an unexpected keyword argument 'token'

#30
by QWOWO - opened

image.png

Attempts:
import streamlit as st # type: ignore
from PyPDF2 import PdfReader # type: ignore
from langchain.text_splitter import RecursiveCharacterTextSplitter # type: ignore
from langchain.embeddings import HuggingFaceInstructEmbeddings # type: ignore
from langchain.vectorstores import FAISS # type: ignore
from dotenv import load_dotenv # type: ignore
from InstructorEmbedding import INSTRUCTOR # type: ignore
from sentence_transformers import SentenceTransformer # Use SentenceTransformer module to use Hugging face Model
#import torch

with st.sidebar:
st.title('LLM Chat App')
st.markdown('''
## About
This app is an LLM-powered chatbot built using:
- Streamlit
- Langchain
- HuggingFace
''')

def main():
load_dotenv()
st.header("Chat with PDF 📄💬")

  # upload PDF file
  pdf = st.file_uploader("Upload your PDF", type='pdf')

  #st.write(pdf) # this code displays file name, indicating if any file is uploaded

  #check if file is uploaded before reading (prevent error on first run, when no doc is uploaded)
  if pdf is not None:
    pdf_reader = PdfReader(pdf)
    #st.write(pdf_reader) 

    text = ""
    for page in pdf_reader.pages:
         # extract text from every page
         text += page.extract_text()

    #st.write(text) #check if text are sucessfully read

    text_splitter = RecursiveCharacterTextSplitter(
         chunk_size = 1000,
         chunk_overlap = 200,
         length_function = len
    )
    chunks = text_splitter.split_text(text=text)

    #st.write(chunks)


    # attempt 1: embeddings (failed)
    #model = INSTRUCTOR('hkunlp/instructor-xl')
    #embeddings = model.encode([[chunk] for chunk in chunks]) 
    
    # attempt 2: embeddings (failed)
    #model = SentenceTransformer('hkunlp/instructor-xl') #loaded for a very long time
    #model = "hkunlp/instructor-xl"
    #embeddings = HuggingFaceInstructEmbeddings(model_name=model)
    

    # attempt 3: embeddings
    #embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
   
    #vectorstore = FAISS.from_text(chunks, embeddings=embeddings)

     # attempt 4 (failed)
      # Initialize the INSTRUCTOR model
    model = INSTRUCTOR('hkunlp/instructor-xl')

    # Encode chunks
    embeddings = []
    for chunk in chunks:
        instruction = "Embed PDF text"
        embeddings.append(model.encode([instruction, chunk]))

    # Initialize vector store
    vectorstore = FAISS.from_text(chunks, embeddings=embeddings)

if name == 'main':

    main()

requirements.txt :
langchain==0.0.154
pyPDF2==3.0.1
python-dotenv==1.0.0
streamlit==1.18.1
faiss-cpu==1.7.4
streamlit-extras
altair==4.1.0
huggingface-hub==0.14.1
InstructorEmbedding==1.0.1
sentence-transformers==2.2.2

using python 3.9

having the same problem,have u solved it?

For me playing around with the versions of the libs solved the issue.

hey which version of the libs solved the issue?

For me playing around with the versions of the libs solved the issue.

bro please tell me which version of lib work for you

I don't remember exactly, but it seems to be sentence-transformers version :
Try sentence-transformers==2.2.2

Discussed here: https://github.com/PromtEngineer/localGPT/issues/722

Sign up or log in to comment