NEXAS commited on
Commit
c2b16ec
·
verified ·
1 Parent(s): b28f380

Update src/utils/ingest_text.py

Browse files
Files changed (1) hide show
  1. src/utils/ingest_text.py +6 -2
src/utils/ingest_text.py CHANGED
@@ -8,6 +8,9 @@ from langchain_community.document_loaders.directory import DirectoryLoader
8
  import os
9
  from fastembed import TextEmbedding
10
  from typing import List
 
 
 
11
 
12
  import nest_asyncio
13
  nest_asyncio.apply()
@@ -18,8 +21,8 @@ llamaparse_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
18
  groq_api_key = os.getenv("GROQ_API_KEY")
19
 
20
 
21
- parsed_data_file = r".\data\parsed_data.pkl"
22
- output_md = r".\data\output.md"
23
  loki = r"data"
24
 
25
  import pickle
@@ -61,6 +64,7 @@ def create_vector_database(loc):
61
 
62
  """
63
  # Call the function to either load or parse the data
 
64
  llama_parse_documents = load_or_parse_data(loc)
65
  #print(llama_parse_documents[1].text[:100])
66
 
 
8
  import os
9
  from fastembed import TextEmbedding
10
  from typing import List
11
+ import nltk
12
+ nltk.download('punkt')
13
+
14
 
15
  import nest_asyncio
16
  nest_asyncio.apply()
 
21
  groq_api_key = os.getenv("GROQ_API_KEY")
22
 
23
 
24
+ parsed_data_file = r"data/parsed_data.pkl"
25
+ output_md = r"data/output.md"
26
  loki = r"data"
27
 
28
  import pickle
 
64
 
65
  """
66
  # Call the function to either load or parse the data
67
+ print("text_db")
68
  llama_parse_documents = load_or_parse_data(loc)
69
  #print(llama_parse_documents[1].text[:100])
70