Spaces:

bazingapaa
/

Llama3.1-70B-Instruct

Sleeping

App Files Files Community

cetusian commited on Nov 21, 2024

Commit

5ffb836

verified ·

1 Parent(s): 00b9af3

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -37

app.py CHANGED Viewed

@@ -1,18 +1,18 @@
 import os
 import gradio as gr
-from huggingface_hub import login
-from huggingface_hub import InferenceClient
 import spaces
-# Retrieve API key and authenticate
 api_key = os.getenv("LLAMA")
 login(api_key)
-# Initialize InferenceClient for the Llama model
-client = InferenceClient("meta-llama/Llama-3.1-70B-Instruct")
 @spaces.GPU
-def respond(
     message,
     history: list[dict],
     system_message,
@@ -22,17 +22,26 @@ def respond(
 ):
     # Start with the system message
     messages = [{"role": "system", "content": system_message}]
-    # Add the conversation history
-    messages += history
-    # Add the latest user message
-    messages.append({"role": "user", "content": message})
-    response = ""
-    # Send the conversation to the model and stream the response
-    for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
@@ -40,29 +49,73 @@ def respond(
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
-        response += token
-        yield response
-# Initialize the Gradio ChatInterface with the new format
-demo = gr.ChatInterface(
-    respond,
-    type="messages",  # Use the OpenAI-style format
-    additional_inputs=[
-        gr.Textbox(
-            value="You are a helpful Customer Support assistant that specializes in the low-code software company: 'Plant an App' and tech-related topics.",
-            label="System message"
-        ),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)"
-        ),
-    ],
-)
 if __name__ == "__main__":
     demo.launch()

 import os
 import gradio as gr
+from huggingface_hub import login, InferenceClient
 import spaces
+# Authenticate with Hugging Face API key
 api_key = os.getenv("LLAMA")
 login(api_key)
+# Initialize InferenceClients for multiple models
+client1 = InferenceClient("meta-llama/Llama-3.1-70B-Instruct")
+client2 = InferenceClient("bigscience/bloom")
 @spaces.GPU
+def compare_models(
     message,
     history: list[dict],
     system_message,
 ):
     # Start with the system message
     messages = [{"role": "system", "content": system_message}]
+    messages += history  # Add conversation history
+    messages.append({"role": "user", "content": message})  # Add user message
+    # Fetch responses from both models
+    response1 = ""
+    response2 = ""
+    # Stream responses for Model 1
+    for message in client1.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        token = message.choices[0].delta.content
+        response1 += token
+    # Stream responses for Model 2
+    for message in client2.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
+        response2 += token
+    # Return responses side-by-side
+    return response1, response2
+def handle_vote(vote, current_votes):
+    """Handle user votes."""
+    current_votes[vote] += 1
+    return f"Model 1: {current_votes['model1']} votes | Model 2: {current_votes['model2']} votes"
+# Initialize voting state
+votes = {"model1": 0, "model2": 0}
+# Create Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# AI Model Comparison Tool")
+    with gr.Row():
+        system_message = gr.Textbox(
+            value="You are a helpful assistant specializing in tech-related topics.",
+            label="System message",
+        )
+        max_tokens = gr.Slider(
+            minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
+        )
+        temperature = gr.Slider(
+            minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
+        )
+        top_p = gr.Slider(
+            minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"
+        )
+    with gr.Row():
+        message = gr.Textbox(label="Enter your message")
+    compare_btn = gr.Button("Compare Models")
+    with gr.Row():
+        response1 = gr.Textbox(label="Response from Model 1")
+        response2 = gr.Textbox(label="Response from Model 2")
+    with gr.Row():
+        vote_model1 = gr.Button("Vote for Model 1")
+        vote_model2 = gr.Button("Vote for Model 2")
+    vote_status = gr.Textbox(
+        value=f"Model 1: {votes['model1']} votes | Model 2: {votes['model2']} votes",
+        label="Voting Results",
+    )
+    # Link components
+    compare_btn.click(
+        compare_models,
+        inputs=[message, [], system_message, max_tokens, temperature, top_p],
+        outputs=[response1, response2],
+    )
+    vote_model1.click(
+        handle_vote,
+        inputs=["model1", votes],
+        outputs=vote_status,
+    )
+    vote_model2.click(
+        handle_vote,
+        inputs=["model2", votes],
+        outputs=vote_status,
+    )
 if __name__ == "__main__":
     demo.launch()