Spaces:
Running
Running
File size: 3,362 Bytes
9b61800 e20929a 996d8f7 5eb46a1 996d8f7 5eb46a1 996d8f7 330338a 996d8f7 330338a 996d8f7 e20929a 996d8f7 e20929a 9b61800 b994a2b bc37b37 642315a 1e1d532 e20929a 1453344 1e1d532 b994a2b e20929a b994a2b bc37b37 1e1d532 e20929a 9b61800 996d8f7 642315a 9b61800 b994a2b e20929a 76c896a 9b61800 29734be 9b61800 29734be 9b61800 29734be 9b61800 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
'''
Description :
Version : 1.0
Author : Chaofan Tao
Mail : tcftrees@gmail.com
Github : https://github.com/sail-sg/scaling-with-vocab
Date : 2024-08-09 00:25
Copyright (C) 2024 Chaofan Tao. All rights reserved.
'''
import gradio as gr
from utils import approach1_isoflops, approach2_derivative, approach3_isoloss
def compute_optimal_vocab(Nnv, flops):
try:
Nnv = float(eval(Nnv))
except ValueError:
return "Invalid input for Non-vocabulary Parameters."
if flops:
try:
flops = float(eval(flops))
except ValueError:
return "Invalid input for FLOPs."
if flops is None or flops == "":
Vopt_app1 = approach1_isoflops(Nnv)
Vopt_app2 = approach2_derivative(Nnv)
Vopt_app3 = approach3_isoloss(Nnv)
else:
Vopt_app1, Vopt_app2 = None, None
Vopt_app3 = approach3_isoloss(Nnv, flops)
results = f"The optimal vocabulary size is:\nApproach 1: {Vopt_app1}\nApproach 2: {Vopt_app2}\nApproach 3: {Vopt_app3}"
return results
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown(
"""<h1>The Optimal Vocabulary Size Predictor</h1>
<h4>
This tool is used to predict the optimal vocabulary size given the non-vocabulary parameters.
We provide 3 ways for prediction:
- **Approach 1: Build the relationship between studied attributes and FLOPs**: Build the relationship between the optimal data points (the points that reach the lowest loss under the same FLOPs budget) and the FLOPs.
- **Approach 2: Derivative-Based Estimation**: Fast calculation method using the derivative of FLOPs with respect to the vocabulary size.
- **Approach 3: Parametric Fit of Loss Formula**: Design a loss formula that considers the effect of vocabulary size and utilizes the loss to make prediction.
Approach 1 and 2 can only be used to compute the optimal vocabulary size when the compute is optimally allocated to non-vocabulary parameters, vocabulary parameters and data jointly.
Approach 3 will not only consider the case above, but also consider the case when the amount of data does not satisfy the optimal compute allocation, and can calculate the optimal vocabulary size with specified FLOPs.
**Thanks for trying** πππ!
</h4>
""")
with gr.Row():
Nnv = gr.Textbox(label="Non-vocabulary Parameters", value=str(7*10**9))
flops = gr.Textbox(label="FLOPs", placeholder="Optional (e.g. 7.05e21)")
output_text = gr.Textbox(label="Prediction")
with gr.Row():
btn = gr.Button("Press it to compute the optimal vocabulary size")
btn.click(
compute_optimal_vocab,
inputs=[Nnv, flops],
outputs=output_text
)
demo.launch()
# import gradio as gr
# def update(name):
# return f"Welcome to Gradio, {name}!"
# with gr.Blocks() as demo:
# gr.Markdown("Start typing below and then click **Run** to see the output.")
# with gr.Row():
# inp = gr.Textbox(placeholder="What is your name?")
# out = gr.Textbox()
# btn = gr.Button("Run")
# btn.click(fn=update, inputs=inp, outputs=out)
# demo.launch()
|