rchrdgwr commited on
Commit
aba4ee7
·
1 Parent(s): 0fbd1a9

file clean up

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. richard/text_utils.py +5 -7
app.py CHANGED
@@ -14,6 +14,7 @@ from richard.text_utils import FileLoader
14
  from richard.pipeline import RetrievalAugmentedQAPipeline
15
  # from richard.vector_database import QdrantDatabase
16
  from qdrant_client import QdrantClient
 
17
 
18
  def process_file(file, use_rct):
19
  fileLoader = FileLoader()
@@ -37,6 +38,7 @@ user_role_prompt = UserRolePrompt(user_prompt_template)
37
 
38
  @cl.on_chat_start
39
  async def on_chat_start():
 
40
  res = await cl.AskActionMessage(
41
  content="Do you want to use Qdrant?",
42
  actions=[
@@ -98,7 +100,7 @@ async def on_chat_start():
98
  await msg.send()
99
 
100
  # decide if to use the dict vector store of the Qdrant vector store
101
- from qdrant_client.models import PointStruct, VectorParams
102
  # Create a dict vector store
103
  if use_qdrant == False:
104
  vector_db = VectorDatabase()
 
14
  from richard.pipeline import RetrievalAugmentedQAPipeline
15
  # from richard.vector_database import QdrantDatabase
16
  from qdrant_client import QdrantClient
17
+ from qdrant_client.models import VectorParams
18
 
19
  def process_file(file, use_rct):
20
  fileLoader = FileLoader()
 
38
 
39
  @cl.on_chat_start
40
  async def on_chat_start():
41
+ # get user inputs
42
  res = await cl.AskActionMessage(
43
  content="Do you want to use Qdrant?",
44
  actions=[
 
100
  await msg.send()
101
 
102
  # decide if to use the dict vector store of the Qdrant vector store
103
+
104
  # Create a dict vector store
105
  if use_qdrant == False:
106
  vector_db = VectorDatabase()
richard/text_utils.py CHANGED
@@ -1,13 +1,12 @@
1
  import os
2
  from typing import List
3
- import fitz
4
  import tempfile
5
  from aimakerspace.text_utils import CharacterTextSplitter
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
 
8
  # load the file
9
-
10
-
11
 
12
  class FileLoader:
13
 
@@ -23,6 +22,7 @@ class FileLoader:
23
  else:
24
  text_splitter=CharacterTextSplitter()
25
  file_extension = os.path.splitext(file.name)[1].lower()
 
26
  with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=file_extension) as temp_file:
27
  self.temp_file_path = temp_file.name
28
  temp_file.write(file.content)
@@ -36,22 +36,19 @@ class FileLoader:
36
  raise ValueError(
37
  f"Unsupported file type: {self.temp_file_path}"
38
  )
39
- print(self.documents)
40
  return text_splitter.split_text(self.documents)
41
  else:
42
  raise ValueError(
43
  "Not a file"
44
  )
45
 
46
-
47
  def load_text_file(self):
48
  with open(self.temp_file_path, "r", encoding=self.encoding) as f:
49
  self.documents.append(f.read())
50
 
51
  def load_pdf_file(self):
52
- print("load_pdf_file()")
53
  pdf_document = fitz.open(self.temp_file_path)
54
- print(len(pdf_document))
55
  for page_num in range(len(pdf_document)):
56
  page = pdf_document.load_page(page_num)
57
  text = page.get_text()
@@ -85,6 +82,7 @@ class CharacterTextSplitter:
85
 
86
 
87
  class MyRecursiveCharacterTextSplitter:
 
88
  def __init__(
89
  self
90
  ):
 
1
  import os
2
  from typing import List
3
+ import fitz # pymupdf
4
  import tempfile
5
  from aimakerspace.text_utils import CharacterTextSplitter
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
 
8
  # load the file
9
+ # handle .txt and .pdf
 
10
 
11
  class FileLoader:
12
 
 
22
  else:
23
  text_splitter=CharacterTextSplitter()
24
  file_extension = os.path.splitext(file.name)[1].lower()
25
+
26
  with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=file_extension) as temp_file:
27
  self.temp_file_path = temp_file.name
28
  temp_file.write(file.content)
 
36
  raise ValueError(
37
  f"Unsupported file type: {self.temp_file_path}"
38
  )
 
39
  return text_splitter.split_text(self.documents)
40
  else:
41
  raise ValueError(
42
  "Not a file"
43
  )
44
 
 
45
  def load_text_file(self):
46
  with open(self.temp_file_path, "r", encoding=self.encoding) as f:
47
  self.documents.append(f.read())
48
 
49
  def load_pdf_file(self):
50
+ # pymupdf
51
  pdf_document = fitz.open(self.temp_file_path)
 
52
  for page_num in range(len(pdf_document)):
53
  page = pdf_document.load_page(page_num)
54
  text = page.get_text()
 
82
 
83
 
84
  class MyRecursiveCharacterTextSplitter:
85
+ # uses langChain.RecursiveCharacterTextSplitter
86
  def __init__(
87
  self
88
  ):