seawolf2357 commited on
Commit
2db0d53
โ€ข
1 Parent(s): d0f2d28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -87
app.py CHANGED
@@ -1,97 +1,48 @@
1
  import gradio as gr
2
- import aiohttp
3
  import os
4
- import json
5
- from collections import deque
6
- import asyncio
7
 
8
- TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
 
9
 
10
- if not TOKEN:
11
- raise ValueError("API token is not set. Please set the HUGGINGFACE_API_TOKEN environment variable.")
12
-
13
- print(f"API Token: {TOKEN[:5]}...{TOKEN[-5:]}") # Check API token
14
-
15
- memory = deque(maxlen=10)
16
-
17
- async def test_api():
18
- headers = {"Authorization": f"Bearer {TOKEN}"}
19
- async with aiohttp.ClientSession() as session:
20
- async with session.get("https://api-inference.huggingface.co/models/mistralai/Mistral-Nemo-Instruct-2407", headers=headers) as response:
21
- print(f"Test API response: {await response.text()}")
22
-
23
- async def respond(
24
  message,
25
  history: list[tuple[str, str]],
26
- system_message="AI Assistant Role",
27
- max_tokens=512,
28
- temperature=0.7,
29
- top_p=0.95,
30
  ):
31
- system_prefix = "System: Respond in the same language as the input (English, Korean, Chinese, Japanese, etc.)."
32
- full_system_message = f"{system_prefix}{system_message}"
33
 
34
- memory.append((message, None))
35
- messages = [{"role": "system", "content": full_system_message}]
36
- for val in memory:
 
 
 
 
37
  if val[0]:
38
  messages.append({"role": "user", "content": val[0]})
39
  if val[1]:
40
  messages.append({"role": "assistant", "content": val[1]})
41
 
42
- headers = {
43
- "Authorization": f"Bearer {TOKEN}",
44
- "Content-Type": "application/json"
45
- }
46
- payload = {
47
- "model": "mistralai/Mistral-Nemo-Instruct-2407",
48
- "max_tokens": max_tokens,
49
- "temperature": temperature,
50
- "top_p": top_p,
51
- "messages": messages,
52
- "stream": True
53
- }
54
-
55
- try:
56
- async with aiohttp.ClientSession() as session:
57
- async with session.post("https://api-inference.huggingface.co/v1/chat/completions", headers=headers, json=payload) as response:
58
- print(f"Response status: {response.status}")
59
- if response.status != 200:
60
- error_text = await response.text()
61
- print(f"Error response: {error_text}")
62
- yield "An API response error occurred. Please try again."
63
- return
64
 
65
- response_text = ""
66
- async for chunk in response.content:
67
- if chunk:
68
- try:
69
- chunk_data = chunk.decode('utf-8')
70
- response_json = json.loads(chunk_data)
71
- if "choices" in response_json:
72
- content = response_json["choices"][0]["message"]["content"]
73
- response_text += content
74
- yield response_text
75
- except json.JSONDecodeError:
76
- continue
77
-
78
- if not response_text:
79
- yield "I apologize, but I couldn't generate a response. Please try again."
80
- except Exception as e:
81
- print(f"Exception occurred: {str(e)}")
82
- yield f"An error occurred: {str(e)}"
83
 
84
- memory[-1] = (message, response_text)
 
 
 
 
 
 
 
 
 
 
85
 
86
- async def chat(message, history, system_message, max_tokens, temperature, top_p):
87
- response = ""
88
- try:
89
- async for chunk in respond(message, history, system_message, max_tokens, temperature, top_p):
90
- response = chunk
91
- yield response
92
- except Exception as e:
93
- print(f"Chat function error: {str(e)}")
94
- yield f"An error occurred in the chat function: {str(e)}"
95
 
96
  theme = "Nymbo/Nymbo_Theme"
97
 
@@ -102,17 +53,25 @@ footer {
102
  """
103
 
104
  demo = gr.ChatInterface(
105
- css=css,
106
- fn=chat,
107
- theme=theme,
108
  additional_inputs=[
109
- gr.Textbox(value="AI Assistant Role", label="System message"),
110
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 
 
111
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
112
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
113
- ]
 
 
 
 
 
 
114
  )
115
 
 
 
 
116
  if __name__ == "__main__":
117
- asyncio.run(test_api()) # Run API test
118
- demo.queue().launch(max_threads=20, show_error=True)
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
  import os
4
+ import requests
 
 
5
 
6
+ # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
7
+ hf_client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=os.getenv("HF_TOKEN"))
8
 
9
+ def respond(
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  message,
11
  history: list[tuple[str, str]],
12
+ system_message,
13
+ max_tokens,
14
+ temperature,
15
+ top_p,
16
  ):
 
 
17
 
18
+ system_prefix = """
19
+ ๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ• ๊ฒƒ.
20
+ """
21
+
22
+ messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}] # prefix ์ถ”๊ฐ€
23
+
24
+ for val in history:
25
  if val[0]:
26
  messages.append({"role": "user", "content": val[0]})
27
  if val[1]:
28
  messages.append({"role": "assistant", "content": val[1]})
29
 
30
+ messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ response = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ for message in hf_client.chat_completion(
35
+ messages,
36
+ max_tokens=max_tokens,
37
+ stream=True,
38
+ temperature=temperature,
39
+ top_p=top_p,
40
+ ):
41
+ token = message.choices[0].delta.content
42
+ if token is not None:
43
+ response += token.strip("") # ํ† ํฐ ์ œ๊ฑฐ
44
+ yield response
45
 
 
 
 
 
 
 
 
 
 
46
 
47
  theme = "Nymbo/Nymbo_Theme"
48
 
 
53
  """
54
 
55
  demo = gr.ChatInterface(
56
+ respond,
 
 
57
  additional_inputs=[
58
+ gr.Textbox(value="""
59
+ ๋„ˆ๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ด๋‹ค.
60
+ """, label="์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ"),
61
+ gr.Slider(minimum=1, maximum=2000, value=512, step=1, label="Max new tokens"),
62
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
63
+ gr.Slider(
64
+ minimum=0.1,
65
+ maximum=1.0,
66
+ value=0.95,
67
+ step=0.05,
68
+ label="Top-p (nucleus sampling)",
69
+ ),
70
+ ],
71
  )
72
 
73
+
74
+
75
+
76
  if __name__ == "__main__":
77
+ demo.launch()