m-ric HF staff commited on
Commit
333777e
Β·
verified Β·
1 Parent(s): 60ca977

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -40,7 +40,7 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
40
  if splitter_selection == LABEL_TEXTSPLITTER:
41
  text_splitter = CharacterTextSplitter(
42
  chunk_size=length,
43
- chunk_overlap=0,
44
  length_function=length_function,
45
  strip_whitespace=False,
46
  is_separator_regex=False,
@@ -49,7 +49,7 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
49
  elif splitter_selection == LABEL_RECURSIVE:
50
  text_splitter = RecursiveCharacterTextSplitter(
51
  chunk_size=length,
52
- chunk_overlap=0,
53
  length_function=length_function,
54
  strip_whitespace=False,
55
  separators=separators,
@@ -59,7 +59,7 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
59
 
60
  unoverlapped_text_splits = unoverlap_list(text_splits)
61
 
62
- output = [((split[0], 0) if split[1] else (split[0], str(i+1))) for i, split in enumerate(unoverlapped_text_splits)]
63
  print(output)
64
  return output
65
 
@@ -138,10 +138,10 @@ with gr.Blocks(theme=gr.themes.Soft(text_size='lg', font=["monospace"], primary_
138
  info="How should we measure our chunk lengths?",
139
  )
140
  slider_count = gr.Slider(
141
- 20, 500, value=200, label="Chunk length πŸ“", info="In the chosen unit."
142
  )
143
  chunk_overlap = gr.Slider(
144
- 0, 30, value=10, label="Overlap between chunks", info="In the chosen unit."
145
  )
146
  out = gr.HighlightedText(
147
  label="Output",
 
40
  if splitter_selection == LABEL_TEXTSPLITTER:
41
  text_splitter = CharacterTextSplitter(
42
  chunk_size=length,
43
+ chunk_overlap=10,
44
  length_function=length_function,
45
  strip_whitespace=False,
46
  is_separator_regex=False,
 
49
  elif splitter_selection == LABEL_RECURSIVE:
50
  text_splitter = RecursiveCharacterTextSplitter(
51
  chunk_size=length,
52
+ chunk_overlap=10,
53
  length_function=length_function,
54
  strip_whitespace=False,
55
  separators=separators,
 
59
 
60
  unoverlapped_text_splits = unoverlap_list(text_splits)
61
 
62
+ output = [((split[0], '0') if split[1] else (split[0], str(i+1))) for i, split in enumerate(unoverlapped_text_splits)]
63
  print(output)
64
  return output
65
 
 
138
  info="How should we measure our chunk lengths?",
139
  )
140
  slider_count = gr.Slider(
141
+ 20, 500, value=200, step=1, label="Chunk length πŸ“", info="In the chosen unit."
142
  )
143
  chunk_overlap = gr.Slider(
144
+ 0, 30, value=10, step=1, label="Overlap between chunks", info="In the chosen unit."
145
  )
146
  out = gr.HighlightedText(
147
  label="Output",