Spaces:
Running
Running
import gradio as gr | |
from haystack.nodes import PreProcessor | |
from haystack import Document | |
preprocessor = PreProcessor( | |
clean_empty_lines=True, | |
clean_whitespace=True, | |
clean_header_footer=True, | |
remove_substrings=None, | |
split_by="word", | |
split_length=200, | |
split_respect_sentence_boundary=True, | |
split_overlap=0, | |
max_chars_check: int = 10_000 | |
) | |
def chunk(text): | |
splits = preprocessor.process(Document(text)) | |
return [ | |
(i%3, split.content) for i, split in enumerate(splits) | |
] | |
iface = gr.Interface( | |
fn=chunk, | |
inputs="text", | |
outputs=gr.HighlightedText( | |
label="Highlights", | |
combine_adjacent=False, | |
show_legend=True, | |
color_map={"0": "red", "1": "green", "2": "yellow"}), | |
) | |
iface.launch() |