Spaces:
Sleeping
Sleeping
luminoussg
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -30,21 +30,42 @@ def count_tokens(json_file, encoding_name):
|
|
30 |
'token_count': conversation_token_count
|
31 |
})
|
32 |
|
33 |
-
return
|
34 |
|
35 |
# Gradio interface function
|
36 |
-
def token_counter(json_file,
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
# Gradio UI setup
|
41 |
-
gr.
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
gr.
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
'token_count': conversation_token_count
|
31 |
})
|
32 |
|
33 |
+
return token_counts, total_token_count
|
34 |
|
35 |
# Gradio interface function
|
36 |
+
def token_counter(json_file, encoding_with_model):
|
37 |
+
# Split encoding name and model type from the dropdown input
|
38 |
+
encoding_name = encoding_with_model.split()[0]
|
39 |
+
|
40 |
+
# Get token counts
|
41 |
+
token_data, total_token_count = count_tokens(json_file, encoding_name)
|
42 |
+
|
43 |
+
return token_data, total_token_count
|
44 |
+
|
45 |
+
# Define the encoding choices with model information
|
46 |
+
encoding_options = [
|
47 |
+
"o200k_base (gpt-4o, gpt-4o-mini)",
|
48 |
+
"cl100k_base (gpt-4-turbo, gpt-4, gpt-3.5-turbo, text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large)",
|
49 |
+
"p50k_base (Codex models, text-davinci-002, text-davinci-003)",
|
50 |
+
"r50k_base (GPT-3 models like davinci)"
|
51 |
+
]
|
52 |
|
53 |
# Gradio UI setup
|
54 |
+
with gr.Blocks() as app:
|
55 |
+
gr.Markdown("# Token Counter for JSON/JSONL Datasets")
|
56 |
+
|
57 |
+
with gr.Row():
|
58 |
+
json_input = gr.File(label="Upload JSON/JSONL File")
|
59 |
+
encoding_dropdown = gr.Dropdown(choices=encoding_options, label="Select Encoding", value="o200k_base (gpt-4o, gpt-4o-mini)")
|
60 |
+
|
61 |
+
# Output for individual conversation token counts
|
62 |
+
conversation_output = gr.JSON(label="Token Counts per Conversation")
|
63 |
+
|
64 |
+
# Output for total token count
|
65 |
+
total_output = gr.Number(label="Total Token Count", interactive=False)
|
66 |
+
|
67 |
+
# Link the inputs and outputs to the function
|
68 |
+
json_input.change(token_counter, [json_input, encoding_dropdown], [conversation_output, total_output])
|
69 |
+
|
70 |
+
# Launch the app
|
71 |
+
app.launch()
|