update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,47 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
"""
|
5 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
6 |
"""
|
7 |
-
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
def respond(
|
@@ -43,19 +80,15 @@ def respond(
|
|
43 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
44 |
"""
|
45 |
demo = gr.ChatInterface(
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
gr.
|
50 |
-
gr.Slider(minimum=
|
51 |
-
gr.Slider(
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
step=0.05,
|
56 |
-
label="Top-p (nucleus sampling)",
|
57 |
-
),
|
58 |
-
],
|
59 |
)
|
60 |
|
61 |
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
|
4 |
+
import os
|
5 |
+
import time
|
6 |
+
import asyncio
|
7 |
+
from pydantic import BaseModel
|
8 |
+
|
9 |
+
from pipeline import PromptEnhancer
|
10 |
+
|
11 |
"""
|
12 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
13 |
"""
|
14 |
+
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
15 |
+
|
16 |
+
|
17 |
+
async def advancedPromptPipeline(payload: InputPrompt):
|
18 |
+
|
19 |
+
input_prompt = payload.text
|
20 |
+
|
21 |
+
model="gpt-4o-mini"
|
22 |
+
|
23 |
+
if model == "gpt-4o":
|
24 |
+
i_cost=5/10**6
|
25 |
+
o_cost=15/10**6
|
26 |
+
elif model == "gpt-4o-mini":
|
27 |
+
i_cost=0.15/10**6
|
28 |
+
o_cost=0.6/10**6
|
29 |
+
|
30 |
+
enhancer = PromptEnhancer(model)
|
31 |
+
|
32 |
+
start_time = time.time()
|
33 |
+
advanced_prompt = await enhancer.enhance_prompt(input_prompt, perform_eval=False)
|
34 |
+
elapsed_time = time.time() - start_time
|
35 |
+
|
36 |
+
return {
|
37 |
+
"model": model,
|
38 |
+
"elapsed_time": elapsed_time,
|
39 |
+
"prompt_tokens": enhancer.prompt_tokens,
|
40 |
+
"completion_tokens": enhancer.completion_tokens,
|
41 |
+
"approximate_cost": (enhancer.prompt_tokens*i_cost)+(enhancer.completion_tokens*o_cost),
|
42 |
+
"inout_prompt": input_prompt,
|
43 |
+
"advanced_prompt": advanced_prompt["advanced_prompt"],
|
44 |
+
}
|
45 |
|
46 |
|
47 |
def respond(
|
|
|
80 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
81 |
"""
|
82 |
demo = gr.ChatInterface(
|
83 |
+
advancedPromptPipeline,
|
84 |
+
#respond,
|
85 |
+
#additional_inputs=[
|
86 |
+
#gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
87 |
+
#gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
88 |
+
#gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
89 |
+
#gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)",
|
90 |
+
# ),
|
91 |
+
#],
|
|
|
|
|
|
|
|
|
92 |
)
|
93 |
|
94 |
|