import gradio as gr import os from tokenizer.basic_bpe import BasicTokenizer print("Loading the model...") model_path = os.path.join(os.getcwd(), "tokenizer_model") model_path = os.path.join(model_path, "hindi_sentiments_basic.model") basic_tokenizer = BasicTokenizer() basic_tokenizer.load(model_path) def test_tokenizer(text): ids = basic_tokenizer.encode(text) decoded = basic_tokenizer.decode(ids) mapping = [(str(i), basic_tokenizer.decode([i])) for i in ids] return ids, decoded, mapping with gr.Blocks() as demo: gr.HTML("

Token Generation for Hindi Dataset

") with gr.Row(): with gr.Column(): inputs = [ gr.TextArea( label="Enter initial text to generate tokens in Hindi", lines=10 ) ] generate_btn = gr.Button(value="Generate Text") with gr.Column(): enc = gr.Textbox(label="Encoded Tokens") txt = gr.Textbox(label="Decoded Text from tokens") map = gr.Textbox(label="Mapping of the tokens and respective texts") outputs = [enc, txt, map] generate_btn.click(fn=test_tokenizer, inputs=inputs, outputs=outputs) if __name__ == "__main__": demo.launch(share=True)