Spaces:
Running
Running
Aymeric Roucher
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -53,7 +53,7 @@ def chunk(text, words, splitter_selection, slider_overlap):
|
|
53 |
),
|
54 |
)
|
55 |
text_splits = [split.content for split in splits]
|
56 |
-
elif splitter_selection == "
|
57 |
text_splitter = CharacterTextSplitter(
|
58 |
separator="",
|
59 |
chunk_size=words,
|
@@ -63,7 +63,16 @@ def chunk(text, words, splitter_selection, slider_overlap):
|
|
63 |
)
|
64 |
splits = text_splitter.create_documents([text])
|
65 |
text_splits = [split.page_content for split in splits]
|
66 |
-
elif splitter_selection == "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
text_splitter = RecursiveCharacterTextSplitter(
|
68 |
chunk_size=words,
|
69 |
chunk_overlap=slider_overlap,
|
@@ -125,10 +134,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
125 |
text = gr.Textbox(label="Your text 🪶", value=ESSAY)
|
126 |
split_selection = gr.Radio(
|
127 |
[
|
128 |
-
"
|
129 |
-
"
|
130 |
-
"
|
131 |
-
"
|
|
|
132 |
],
|
133 |
value="Character",
|
134 |
label="Chunking method ",
|
|
|
53 |
),
|
54 |
)
|
55 |
text_splits = [split.content for split in splits]
|
56 |
+
elif splitter_selection == "LangChain's CharacterTextSplitter":
|
57 |
text_splitter = CharacterTextSplitter(
|
58 |
separator="",
|
59 |
chunk_size=words,
|
|
|
63 |
)
|
64 |
splits = text_splitter.create_documents([text])
|
65 |
text_splits = [split.page_content for split in splits]
|
66 |
+
elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - vanilla":
|
67 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
68 |
+
chunk_size=words,
|
69 |
+
chunk_overlap=slider_overlap,
|
70 |
+
length_function=len,
|
71 |
+
add_start_index=True,
|
72 |
+
)
|
73 |
+
splits = text_splitter.create_documents([text])
|
74 |
+
text_splits = [split.page_content for split in splits]
|
75 |
+
elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - with '.'":
|
76 |
text_splitter = RecursiveCharacterTextSplitter(
|
77 |
chunk_size=words,
|
78 |
chunk_overlap=slider_overlap,
|
|
|
134 |
text = gr.Textbox(label="Your text 🪶", value=ESSAY)
|
135 |
split_selection = gr.Radio(
|
136 |
[
|
137 |
+
"LangChain's CharacterTextSplitter",
|
138 |
+
"Langchain's RecursiveCharacterTextSplitter - vanilla",
|
139 |
+
"Langchain's RecursiveCharacterTextSplitter - with '.'"
|
140 |
+
"Haystack's PreProcessor - Word level, no sentence boundaries",
|
141 |
+
"Haystack's PreProcessor - Word level, respect sentence boundaries",
|
142 |
],
|
143 |
value="Character",
|
144 |
label="Chunking method ",
|