Aymeric Roucher commited on
Commit
577bd8b
·
verified ·
1 Parent(s): d830b97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -53,7 +53,7 @@ def chunk(text, words, splitter_selection, slider_overlap):
53
  ),
54
  )
55
  text_splits = [split.content for split in splits]
56
- elif splitter_selection == "Character":
57
  text_splitter = CharacterTextSplitter(
58
  separator="",
59
  chunk_size=words,
@@ -63,7 +63,16 @@ def chunk(text, words, splitter_selection, slider_overlap):
63
  )
64
  splits = text_splitter.create_documents([text])
65
  text_splits = [split.page_content for split in splits]
66
- elif splitter_selection == "Recursive Character Text Splitter":
 
 
 
 
 
 
 
 
 
67
  text_splitter = RecursiveCharacterTextSplitter(
68
  chunk_size=words,
69
  chunk_overlap=slider_overlap,
@@ -125,10 +134,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
125
  text = gr.Textbox(label="Your text 🪶", value=ESSAY)
126
  split_selection = gr.Radio(
127
  [
128
- "Word - respect sentence boundaries",
129
- "Word - no sentence boundaries",
130
- "Recursive Character Text Splitter",
131
- "Character",
 
132
  ],
133
  value="Character",
134
  label="Chunking method ",
 
53
  ),
54
  )
55
  text_splits = [split.content for split in splits]
56
+ elif splitter_selection == "LangChain's CharacterTextSplitter":
57
  text_splitter = CharacterTextSplitter(
58
  separator="",
59
  chunk_size=words,
 
63
  )
64
  splits = text_splitter.create_documents([text])
65
  text_splits = [split.page_content for split in splits]
66
+ elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - vanilla":
67
+ text_splitter = RecursiveCharacterTextSplitter(
68
+ chunk_size=words,
69
+ chunk_overlap=slider_overlap,
70
+ length_function=len,
71
+ add_start_index=True,
72
+ )
73
+ splits = text_splitter.create_documents([text])
74
+ text_splits = [split.page_content for split in splits]
75
+ elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - with '.'":
76
  text_splitter = RecursiveCharacterTextSplitter(
77
  chunk_size=words,
78
  chunk_overlap=slider_overlap,
 
134
  text = gr.Textbox(label="Your text 🪶", value=ESSAY)
135
  split_selection = gr.Radio(
136
  [
137
+ "LangChain's CharacterTextSplitter",
138
+ "Langchain's RecursiveCharacterTextSplitter - vanilla",
139
+ "Langchain's RecursiveCharacterTextSplitter - with '.'"
140
+ "Haystack's PreProcessor - Word level, no sentence boundaries",
141
+ "Haystack's PreProcessor - Word level, respect sentence boundaries",
142
  ],
143
  value="Character",
144
  label="Chunking method ",