Spaces:
Running
Running
Aymeric Roucher
commited on
Commit
β’
db61c57
1
Parent(s):
37c61d6
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,33 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
iface = gr.Interface(
|
7 |
-
fn=
|
8 |
inputs="text",
|
9 |
outputs=gr.HighlightedText(
|
10 |
-
label="
|
11 |
-
combine_adjacent=
|
12 |
show_legend=True,
|
13 |
-
color_map={"
|
14 |
)
|
15 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from haystack.nodes import PreProcessor
|
3 |
+
from haystack import Document
|
4 |
|
5 |
+
preprocessor = PreProcessor(
|
6 |
+
clean_empty_lines=True,
|
7 |
+
clean_whitespace=True,
|
8 |
+
clean_header_footer=True,
|
9 |
+
remove_substrings=None,
|
10 |
+
split_by="word",
|
11 |
+
split_length=200,
|
12 |
+
split_respect_sentence_boundary=True,
|
13 |
+
split_overlap=0,
|
14 |
+
max_chars_check: int = 10_000
|
15 |
+
)
|
16 |
+
|
17 |
+
def chunk(text):
|
18 |
+
splits = preprocessor.process(Document(text))
|
19 |
+
|
20 |
+
return [
|
21 |
+
(i%3, split.content) for i, split in enumerate(splits)
|
22 |
+
]
|
23 |
|
24 |
iface = gr.Interface(
|
25 |
+
fn=chunk,
|
26 |
inputs="text",
|
27 |
outputs=gr.HighlightedText(
|
28 |
+
label="Highlights",
|
29 |
+
combine_adjacent=False,
|
30 |
show_legend=True,
|
31 |
+
color_map={"0": "red", "1": "green", "2": "yellow"}),
|
32 |
)
|
33 |
iface.launch()
|