code
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
|
11 |
|
12 |
model = AutoModelForCausalLM.from_pretrained(
|
13 |
"NyxKrage/Microsoft_Phi-4",
|
14 |
-
device_map="
|
15 |
torch_dtype="auto",
|
16 |
trust_remote_code=True,
|
17 |
)
|
@@ -49,9 +49,9 @@ def respond(
|
|
49 |
max_new_tokens=max_tokens,
|
50 |
temperature=temperature,
|
51 |
streamer=streamer,
|
52 |
-
top_p=top_p,
|
53 |
return_full_text=False,
|
54 |
-
do_sample=
|
55 |
)
|
56 |
|
57 |
response = ""
|
@@ -60,7 +60,7 @@ def respond(
|
|
60 |
thread.start()
|
61 |
# Print the generated text in real-time
|
62 |
for new_text in streamer:
|
63 |
-
yield new_text
|
64 |
|
65 |
with gr.Blocks() as demo:
|
66 |
with gr.Row():
|
@@ -79,7 +79,7 @@ with gr.Blocks() as demo:
|
|
79 |
system_message = gr.Textbox(value="You are a friendly Chatbot.", label="System message")
|
80 |
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
|
81 |
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
82 |
-
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
|
83 |
seed = gr.Slider(minimum=0, maximum=20091114, value=42, step=1, label="Seed")
|
84 |
|
85 |
with gr.Column():
|
|
|
11 |
|
12 |
model = AutoModelForCausalLM.from_pretrained(
|
13 |
"NyxKrage/Microsoft_Phi-4",
|
14 |
+
device_map="cuda",
|
15 |
torch_dtype="auto",
|
16 |
trust_remote_code=True,
|
17 |
)
|
|
|
49 |
max_new_tokens=max_tokens,
|
50 |
temperature=temperature,
|
51 |
streamer=streamer,
|
52 |
+
# top_p=top_p,
|
53 |
return_full_text=False,
|
54 |
+
do_sample=False,
|
55 |
)
|
56 |
|
57 |
response = ""
|
|
|
60 |
thread.start()
|
61 |
# Print the generated text in real-time
|
62 |
for new_text in streamer:
|
63 |
+
yield tokenizer.decode(new_text, skip_special_tokens=True)
|
64 |
|
65 |
with gr.Blocks() as demo:
|
66 |
with gr.Row():
|
|
|
79 |
system_message = gr.Textbox(value="You are a friendly Chatbot.", label="System message")
|
80 |
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
|
81 |
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
82 |
+
# top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
|
83 |
seed = gr.Slider(minimum=0, maximum=20091114, value=42, step=1, label="Seed")
|
84 |
|
85 |
with gr.Column():
|