Aymeric Roucher
Update app.py
db61c57
raw
history blame
776 Bytes
import gradio as gr
from haystack.nodes import PreProcessor
from haystack import Document
preprocessor = PreProcessor(
clean_empty_lines=True,
clean_whitespace=True,
clean_header_footer=True,
remove_substrings=None,
split_by="word",
split_length=200,
split_respect_sentence_boundary=True,
split_overlap=0,
max_chars_check: int = 10_000
)
def chunk(text):
splits = preprocessor.process(Document(text))
return [
(i%3, split.content) for i, split in enumerate(splits)
]
iface = gr.Interface(
fn=chunk,
inputs="text",
outputs=gr.HighlightedText(
label="Highlights",
combine_adjacent=False,
show_legend=True,
color_map={"0": "red", "1": "green", "2": "yellow"}),
)
iface.launch()