Spaces:

ZoroaStrella
/

RekaFlash

Running on Zero

App Files Files Community

ZoroaStrella commited on 10 days ago

Commit

ce9b3a4

1 Parent(s): cdceb5d

Update to use reka

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +156 -51
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -11,4 +11,4 @@ license: apache-2.0
 short_description: A chat interface to use Reka Flash 3 OSS apache model
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 short_description: A chat interface to use Reka Flash 3 OSS apache model
 ---
+An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

app.py CHANGED Viewed

@@ -1,64 +1,169 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
     message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
     temperature,
     top_p,
 ):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+# Configuration
+MODEL_NAME = "RekaAI/reka-flash-3"
+DEFAULT_MAX_LENGTH = 1024
+DEFAULT_TEMPERATURE = 0.7
+# System prompt
+SYSTEM_PROMPT = """You are Reka Flash-3, a helpful AI assistant created by Reka AI.
+Provide detailed, helpful answers while maintaining safety.
+Format responses clearly using markdown when appropriate."""
+def generate_response(
     message,
+    chat_history,
+    system_prompt,
+    max_length,
     temperature,
     top_p,
+    top_k,
+    repetition_penalty,
+    presence_penalty,
+    frequency_penalty,
+    show_reasoning
 ):
+    # Format the prompt
+    formatted_prompt = f"System: {system_prompt}\n\nUser: {message}\n\nAssistant:"
+    # Create client
+    client = InferenceClient()
+    # Generate response
+    response = client.text_generation(
+        MODEL_NAME,
+        prompt=formatted_prompt,
+        max_new_tokens=max_length,
         temperature=temperature,
         top_p=top_p,
+        top_k=top_k,
+        repetition_penalty=repetition_penalty,
+        presence_penalty=presence_penalty,
+        frequency_penalty=frequency_penalty,
+        details=show_reasoning,
+    )
+    # Extract reasoning and final answer if available
+    reasoning = ""
+    final_answer = response
+    if show_reasoning and hasattr(response, 'details'):
+        reasoning = response.details.get('reasoning', '')
+        final_answer = response.generated_text
+    # Update chat history
+    chat_history.append((message, final_answer))
+    # Create full history with reasoning
+    full_history = list(chat_history)
+    if show_reasoning and reasoning:
+        full_history[-1] = (full_history[-1][0], f"{final_answer}\n\nREASONING:\n{reasoning}")
+    return "", chat_history, reasoning if show_reasoning else ""
+# UI Components
+with gr.Blocks(title="Reka Flash-3 Chat Demo", theme=gr.themes.Soft()) as demo:
+    # Header Section
+    gr.Markdown(f"""
+    # Reka Flash-3 Chat Interface
+    *Powered by [Reka Core AI](https://www.reka.ai/)*
+    """)
+    # Deployment Notice
+    with gr.Accordion("Important Deployment Notice", open=True):
+        gr.Markdown(f"""
+        **To deploy this model on Hugging Face Spaces:**
+        1. Request access to Reka Flash-3 from [Hugging Face Hub](https://huggingface.co/{MODEL_NAME})
+        2. Ensure you have Hugging Face PRO subscription
+        3. Add your HF token in Space settings
+        4. Set `GPU_SMALL` or higher in Space hardware settings
+        """)
+    # Chat Interface
+    with gr.Row():
+        chatbot = gr.Chatbot(height=500)
+        reasoning_display = gr.Textbox(
+            label="Model Reasoning",
+            interactive=False,
+            visible=True,
+            lines=20,
+            max_lines=20
+        )
+    # Input Section
+    with gr.Row():
+        message = gr.Textbox(
+            label="Your Message",
+            placeholder="Type your message here...",
+            lines=3,
+            max_lines=6
+        )
+        submit_btn = gr.Button("Send", variant="primary")
+    # Parameters
+    with gr.Accordion("Normal Options", open=False):
+        with gr.Row():
+            max_length = gr.Slider(128, 4096, value=DEFAULT_MAX_LENGTH, label="Max Length")
+            temperature = gr.Slider(0.1, 2.0, value=DEFAULT_TEMPERATURE, label="Temperature")
+    with gr.Accordion("Advanced Options", open=False):
+        with gr.Row():
+            top_p = gr.Slider(0.0, 1.0, value=0.95, label="Top-p")
+            top_k = gr.Slider(1, 100, value=50, label="Top-k")
+            repetition_penalty = gr.Slider(0.1, 2.0, value=1.1, label="Repetition Penalty")
+        with gr.Row():
+            presence_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Presence Penalty")
+            frequency_penalty = gr.Slider(-2.0, 2.0, value=0.0, label="Frequency Penalty")
+    # System Prompt
+    system_prompt = gr.Textbox(
+        label="System Prompt",
+        value=SYSTEM_PROMPT,
+        lines=3
+    )
+    # Debug Options
+    show_reasoning = gr.Checkbox(
+        label="Show Model Reasoning",
+        value=True
+    )
+    # Event Handling
+    submit_btn.click(
+        generate_response,
+        inputs=[
+            message,
+            chatbot,
+            system_prompt,
+            max_length,
+            temperature,
+            top_p,
+            top_k,
+            repetition_penalty,
+            presence_penalty,
+            frequency_penalty,
+            show_reasoning
+        ],
+        outputs=[message, chatbot, reasoning_display]
+    )
+    message.submit(
+        generate_response,
+        inputs=[
+            message,
+            chatbot,
+            system_prompt,
+            max_length,
+            temperature,
+            top_p,
+            top_k,
+            repetition_penalty,
+            presence_penalty,
+            frequency_penalty,
+            show_reasoning
+        ],
+        outputs=[message, chatbot, reasoning_display]
+    )
+# Deployment instructions
+demo.launch(debug=True)

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@

1	huggingface_hub==0.25.2


1	+ gradio>=3.50
2	huggingface_hub==0.25.2