m-ric HF staff commited on
Commit
b240482
·
verified ·
1 Parent(s): 93f7595

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -17
app.py CHANGED
@@ -5,7 +5,8 @@ from langchain.text_splitter import (
5
  )
6
 
7
 
8
- def chunk(text, length, splitter_selection):
 
9
  if splitter_selection == "LangChain's CharacterTextSplitter":
10
  text_splitter = CharacterTextSplitter(
11
  separator="",
@@ -16,22 +17,13 @@ def chunk(text, length, splitter_selection):
16
  )
17
  splits = text_splitter.create_documents([text])
18
  text_splits = [split.page_content for split in splits]
19
- elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - vanilla":
20
  text_splitter = RecursiveCharacterTextSplitter(
21
  chunk_size=length,
22
  chunk_overlap=0,
23
  length_function=len,
24
  add_start_index=True,
25
- )
26
- splits = text_splitter.create_documents([text])
27
- text_splits = [split.page_content for split in splits]
28
- elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - with '.'":
29
- text_splitter = RecursiveCharacterTextSplitter(
30
- chunk_size=length,
31
- chunk_overlap=0,
32
- length_function=len,
33
- add_start_index=True,
34
- separators=["\n\n", "\n", ".", " ", ""],
35
  )
36
  splits = text_splitter.create_documents([text])
37
  text_splits = [split.page_content for split in splits]
@@ -76,7 +68,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
76
  label="Chunking method ",
77
  info="How should we split our chunks?",
78
  )
79
- separator_selection = gr.Textbox(value=["\n\n", "\n", ".", " ", ""], label="Separators used in RecursiveCharacterTextSplitter")
 
 
 
80
  length_unit_selection = gr.Dropdown(
81
  choices=[
82
  "Character count",
@@ -96,22 +91,22 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
96
  )
97
  text.change(
98
  fn=chunk,
99
- inputs=[text, slider_count, split_selection],
100
  outputs=out,
101
  )
102
  length_unit_selection.change(
103
  fn=chunk,
104
- inputs=[text, slider_count, split_selection],
105
  outputs=out,
106
  )
107
  split_selection.change(
108
  fn=chunk,
109
- inputs=[text, slider_count, split_selection],
110
  outputs=out,
111
  )
112
  slider_count.change(
113
  fn=chunk,
114
- inputs=[text, slider_count, split_selection],
115
  outputs=out,
116
  )
117
  demo.launch()
 
5
  )
6
 
7
 
8
+ def chunk(text, length, splitter_selection, separators_str):
9
+ separators = separators_str[1:-1].split(", ")
10
  if splitter_selection == "LangChain's CharacterTextSplitter":
11
  text_splitter = CharacterTextSplitter(
12
  separator="",
 
17
  )
18
  splits = text_splitter.create_documents([text])
19
  text_splits = [split.page_content for split in splits]
20
+ elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter":
21
  text_splitter = RecursiveCharacterTextSplitter(
22
  chunk_size=length,
23
  chunk_overlap=0,
24
  length_function=len,
25
  add_start_index=True,
26
+ separators=separators,
 
 
 
 
 
 
 
 
 
27
  )
28
  splits = text_splitter.create_documents([text])
29
  text_splits = [split.page_content for split in splits]
 
68
  label="Chunking method ",
69
  info="How should we split our chunks?",
70
  )
71
+ separator_selection = gr.Textbox(
72
+ value=["\n\n", "\n", ".", " ", ""],
73
+ label="Separators used in RecursiveCharacterTextSplitter",
74
+ )
75
  length_unit_selection = gr.Dropdown(
76
  choices=[
77
  "Character count",
 
91
  )
92
  text.change(
93
  fn=chunk,
94
+ inputs=[text, slider_count, split_selection, separator_selection],
95
  outputs=out,
96
  )
97
  length_unit_selection.change(
98
  fn=chunk,
99
+ inputs=[text, slider_count, split_selection, separator_selection],
100
  outputs=out,
101
  )
102
  split_selection.change(
103
  fn=chunk,
104
+ inputs=[text, slider_count, split_selection, separator_selection],
105
  outputs=out,
106
  )
107
  slider_count.change(
108
  fn=chunk,
109
+ inputs=[text, slider_count, split_selection, separator_selection],
110
  outputs=out,
111
  )
112
  demo.launch()