Spaces:

Lyte
/

Any-Tokenizer-Count

Sleeping

File size: 1,108 Bytes
import gradio as gr
from transformers import AutoTokenizer
from huggingface_hub import HfApi
from gradio_huggingfacehub_search import HuggingfaceHubSearch

def count_tokens(model_id, text):
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        tokens = tokenizer.encode(text)
    
        token_count = len(tokens)
    
        return f"Number of tokens: {token_count}"
    except Exception as e:
        return f"Error: {str(e)}"

with gr.Blocks() as iface:
    gr.Markdown("# Universal Tokenizer - Token Counter")
    gr.Markdown("This app counts the number of tokens in the provided text using any tokenizer from a Hugging Face model.")
    
    model_id = HuggingfaceHubSearch(
        label="Select a model repo with a tokenizer",
        placeholder="Search for a model on Hugging Face",
        search_type="model",
    )

    text_input = gr.Textbox(lines=5, placeholder="Enter your text here...")
    
    output = gr.Textbox(label="Result")
    
    btn = gr.Button("Count Tokens")
    btn.click(fn=count_tokens, inputs=[model_id, text_input], outputs=output)

iface.launch()