as-cle-bert commited on
Commit
79cb6aa
1 Parent(s): d96e8ed

Rename app.py to space.py

Browse files
Files changed (1) hide show
  1. app.py → space.py +10 -10
app.py → space.py RENAMED
@@ -1,11 +1,13 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -26,8 +28,9 @@ def respond(
26
  messages.append({"role": "user", "content": message})
27
 
28
  response = ""
29
-
30
- for message in client.chat_completion(
 
31
  messages,
32
  max_tokens=max_tokens,
33
  stream=True,
@@ -39,9 +42,6 @@ def respond(
39
  response += token
40
  yield response
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
 
1
  import gradio as gr
2
+ import huggingface_hub
3
+ from openai import OpenAI
4
 
5
+ TOKEN = os.getenv("HF_TOKEN")
 
 
 
6
 
7
+ client = OpenAI(
8
+ base_url="https://api-inference.huggingface.co/v1/",
9
+ api_key=huggingface_hub.get_token(),
10
+ )
11
 
12
  def respond(
13
  message,
 
28
  messages.append({"role": "user", "content": message})
29
 
30
  response = ""
31
+
32
+ for message in client.chat.completions.create(
33
+ model="meta-llama/Meta-Llama-3.1-405B-Instruct-FP8",
34
  messages,
35
  max_tokens=max_tokens,
36
  stream=True,
 
42
  response += token
43
  yield response
44
 
 
 
 
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[