Standard_Intelligence_Dev

Sleeping

heymenn commited on Apr 19, 2024

Commit

0a6c112

verified ·

1 Parent(s): a1f97d1

Update split_files_to_excel.py

Files changed (1) hide show

split_files_to_excel.py CHANGED Viewed

@@ -26,6 +26,7 @@ from pypdf import PdfReader
 import pandas as pd
 MODEL = "thenlper/gte-base"
 CHUNK_SIZE = 1000
 CHUNK_OVERLAP = 200
@@ -35,11 +36,15 @@ embeddings = HuggingFaceEmbeddings(
     cache_folder=os.getenv("SENTENCE_TRANSFORMERS_HOME")
 )
 model_id = "mistralai/Mistral-7B-Instruct-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(
     model_id,
-    padding_side="left"
 )
 text_splitter = CharacterTextSplitter(

 import pandas as pd
 MODEL = "thenlper/gte-base"
 CHUNK_SIZE = 1000
 CHUNK_OVERLAP = 200
     cache_folder=os.getenv("SENTENCE_TRANSFORMERS_HOME")
 )
 model_id = "mistralai/Mistral-7B-Instruct-v0.1"
+acces_token = os.getenv("HUGGINGFACE_SPLITFILES_API_KEY")
 tokenizer = AutoTokenizer.from_pretrained(
     model_id,
+    padding_side="left",
+    token = access_token
 )
 text_splitter = CharacterTextSplitter(