import os import gradio as gr from spacy.lang.en import English from transformers import AutoTokenizer # download spacy model --- os.system('python -m spacy download en_core_web_sm') deberta_v3_tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base") mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") en_tokenizer = English().tokenizer def tokenize_with_spacy(text, tokenizer=en_tokenizer): tokenized_text = tokenizer(text) tokens = [token.text for token in tokenized_text] return tokens def tokenize_with_hf(text, tokenizer=deberta_v3_tokenizer): tokenized_text = tokenizer.tokenize(text) return tokenized_text def tokenize(text): s = tokenize_with_spacy(text) d = tokenize_with_hf(text) m = tokenize_with_hf(text, tokenizer=mistral_tokenizer) return s, d, m with gr.Blocks() as demo: input_text = gr.Textbox(lines=2, placeholder="Input text...") submit_btn = gr.Button("Submit") spacy_display = gr.JSON(label="Spacy") deb_display = gr.JSON(label="DeBERTa-V3") mistral_display = gr.JSON(label="Mistral") # callback --- submit_btn.click( fn=tokenize, inputs=input_text, outputs=[spacy_display, deb_display, mistral_display], ) # launch app -------- demo.launch()