Futuresony commited on
Commit
419b89b
·
verified ·
1 Parent(s): a42f949

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -35
app.py CHANGED
@@ -4,9 +4,11 @@ from huggingface_hub import InferenceClient
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
- client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
8
 
 
 
9
 
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
@@ -14,72 +16,73 @@ def respond(
14
  max_tokens,
15
  temperature,
16
  top_p,
17
- top_k=50, # Added top_k for fine control over generation
18
- repetition_penalty=1.2, # Added to penalize repetitive patterns
19
  ):
 
20
  messages = [{"role": "system", "content": system_message}]
21
 
22
  for val in history:
23
- if val[0]:
24
  messages.append({"role": "user", "content": val[0]})
25
- if val[1]:
26
  messages.append({"role": "assistant", "content": val[1]})
27
 
 
28
  messages.append({"role": "user", "content": message})
29
 
30
  response = ""
31
 
32
- # Streaming response with additional settings
33
  for message in client.chat_completion(
34
- messages,
35
  max_tokens=max_tokens,
36
  stream=True,
37
  temperature=temperature,
38
  top_p=top_p,
39
- top_k=top_k, # Added top_k
40
- repetition_penalty=repetition_penalty, # Added repetition_penalty
41
- use_cache=True, # Enables caching for efficiency
42
  ):
 
43
  token = message.choices[0].delta.content
44
-
45
  response += token
46
- yield response
47
 
48
 
49
  """
50
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
51
  """
52
  demo = gr.ChatInterface(
53
- respond,
54
  additional_inputs=[
55
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
56
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
57
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
58
- gr.Slider(
59
- minimum=0.1,
60
- maximum=1.0,
61
- value=0.95,
62
- step=0.05,
63
- label="Top-p (nucleus sampling)",
64
- ),
65
  gr.Slider(
66
- minimum=10,
67
- maximum=100,
68
- value=50,
69
  step=1,
70
- label="Top-k (sampling control)", # New input for top-k
71
- ),
72
  gr.Slider(
73
- minimum=1.0,
74
  maximum=2.0,
75
- value=1.2,
76
  step=0.1,
77
- label="Repetition Penalty", # New input for repetition_penalty
78
- ),
 
 
 
 
 
 
 
79
  ],
 
 
80
  )
81
 
82
-
83
  if __name__ == "__main__":
84
  demo.launch()
85
-
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
 
7
 
8
+ # Load the inference client
9
+ client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
10
 
11
+ # Function to handle the chat interaction
12
  def respond(
13
  message,
14
  history: list[tuple[str, str]],
 
16
  max_tokens,
17
  temperature,
18
  top_p,
 
 
19
  ):
20
+ # Prepare the system message and chat history
21
  messages = [{"role": "system", "content": system_message}]
22
 
23
  for val in history:
24
+ if val[0]: # User's message
25
  messages.append({"role": "user", "content": val[0]})
26
+ if val[1]: # Assistant's response
27
  messages.append({"role": "assistant", "content": val[1]})
28
 
29
+ # Add the new user message
30
  messages.append({"role": "user", "content": message})
31
 
32
  response = ""
33
 
34
+ # Use `stream=True` to receive token-by-token responses
35
  for message in client.chat_completion(
36
+ messages=messages,
37
  max_tokens=max_tokens,
38
  stream=True,
39
  temperature=temperature,
40
  top_p=top_p,
41
+ use_cache=True, # Enable caching for efficiency
 
 
42
  ):
43
+ # Append the content of the generated token
44
  token = message.choices[0].delta.content
 
45
  response += token
46
+ yield response # Stream the response incrementally to the interface
47
 
48
 
49
  """
50
+ Customize the ChatInterface using Gradio. For details, check Gradio's documentation:
51
+ https://www.gradio.app/docs/chatinterface
52
  """
53
  demo = gr.ChatInterface(
54
+ fn=respond, # Function to handle the chat
55
  additional_inputs=[
56
+ gr.Textbox(
57
+ value="You are a friendly and helpful assistant.",
58
+ label="System message",
59
+ ), # Default system message
 
 
 
 
 
 
60
  gr.Slider(
61
+ minimum=1,
62
+ maximum=2048,
63
+ value=512,
64
  step=1,
65
+ label="Max new tokens",
66
+ ), # Slider to control max token limit
67
  gr.Slider(
68
+ minimum=0.1,
69
  maximum=2.0,
70
+ value=0.7,
71
  step=0.1,
72
+ label="Temperature",
73
+ ), # Slider to adjust temperature
74
+ gr.Slider(
75
+ minimum=0.1,
76
+ maximum=1.0,
77
+ value=0.9,
78
+ step=0.05,
79
+ label="Top-p (nucleus sampling)",
80
+ ), # Slider to adjust top-p
81
  ],
82
+ title="AI Chatbot Interface",
83
+ description="Interact with the AI model using this chat interface. Adjust generation parameters for better control.",
84
  )
85
 
86
+ # Launch the Gradio interface
87
  if __name__ == "__main__":
88
  demo.launch()