import gradio as gr from haystack.nodes import PreProcessor from haystack import Document preprocessor = PreProcessor( clean_empty_lines=True, clean_whitespace=True, clean_header_footer=True, remove_substrings=None, split_by="word", split_length=200, split_respect_sentence_boundary=True, split_overlap=0, max_chars_check: int = 10_000 ) def chunk(text): splits = preprocessor.process(Document(text)) return [ (i%3, split.content) for i, split in enumerate(splits) ] iface = gr.Interface( fn=chunk, inputs="text", outputs=gr.HighlightedText( label="Highlights", combine_adjacent=False, show_legend=True, color_map={"0": "red", "1": "green", "2": "yellow"}), ) iface.launch()