Spaces:

Tonic
/

Octopus

Runtime error

App Files Files Community

Tonic commited on Apr 3

Commit

bcbfe89

•

1 Parent(s): 120ef71

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -10

app.py CHANGED Viewed

@@ -1,10 +1,68 @@
 import gradio as gr
 from transformers import AutoTokenizer
 from gemma.modeling_gemma import GemmaForCausalLM
 import torch
 import time
-# Assuming the GemmaForCausalLM and the specific tokenizer are correctly installed and imported
 def inference(input_text):
     start_time = time.time()
@@ -19,13 +77,6 @@ def inference(input_text):
     end_time = time.time()
     return {"output": res, "latency": f"{end_time - start_time:.2f} seconds"}
-# Initialize the tokenizer and model
-model_id = "NexaAIDev/Octopus-v2"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = GemmaForCausalLM.from_pretrained(
-    model_id, torch_dtype=torch.bfloat16, device_map="auto"
-)
 def gradio_interface(input_text):
     nexa_query = f"Below is the query from the users, please call the correct function and generate the parameters to call the function.\n\nQuery: {input_text} \n\nResponse:"
     result = inference(nexa_query)
@@ -35,8 +86,9 @@ iface = gr.Interface(
     fn=gradio_interface,
     inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your query here..."),
     outputs=[gr.outputs.Textbox(label="Output"), gr.outputs.Textbox(label="Latency")],
-    title="Gemma Model Inference",
-    description="This application uses the Gemma model for generating responses based on the input query."
 )
 if __name__ == "__main__":

 import gradio as gr
 from transformers import AutoTokenizer
+import gemma
 from gemma.modeling_gemma import GemmaForCausalLM
 import torch
 import time
+title = "Tonic's 🐙🐙Octopus"
+description = "Octopus-V2-2B, an advanced open-source language model with 2 billion parameters, represents Nexa AI's research breakthrough in the application of large language models (LLMs) for function calling, specifically tailored for Android APIs. Unlike Retrieval-Augmented Generation (RAG) methods, which require detailed descriptions of potential function arguments—sometimes needing up to tens of thousands of input tokens—Octopus-V2-2B introduces a unique functional token strategy for both its training and inference stages. This approach not only allows it to achieve performance levels comparable to GPT-4 but also significantly enhances its inference speed beyond that of RAG-based methods, making it especially beneficial for edge computing devices."
+#From NexusRaven2 Notebook : https://github.com/nexusflowai/NexusRaven-V2/blob/master/How-To-Prompt.ipynb
+example1 = '''def get_weather_data(coordinates):
+    """
+    Fetches weather data from the Open-Meteo API for the given latitude and longitude.
+    Args:
+    coordinates (tuple): The latitude of the location.
+    Returns:
+    float: The current temperature in the coordinates you've asked for
+    """
+def get_coordinates_from_city(city_name):
+    """
+    Fetches the latitude and longitude of a given city name using the Maps.co Geocoding API.
+    Args:
+    city_name (str): The name of the city.
+    Returns:
+    tuple: The latitude and longitude of the city.
+What's the weather like in Seattle right now?
+'''
+example2 = '''Function:
+def add_edge(u, v):
+    """
+    Adds an edge between node u and node v in the graph. Make sure to create a graph first by calling create_new_graph!
+    Args:
+      u (str): Node name as string
+      v (str): Node name as string
+    """
+Function:
+def is_two_nodes_connected(u, v):
+    """
+    Answers if two nodes are connected.
+    """
+Emma is friends with Bob and Charlie, and Charlie is friends with Erik, and Erik is friends with Brian. Can you represent all of these relationship as a graph and answer if Emma is friends with Erik?
+'''
+EXAMPLES = [
+    [example1],
+    [example2]
+]
+model_id = "NexaAIDev/Octopus-v2"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = GemmaForCausalLM.from_pretrained(
+    model_id, torch_dtype=torch.bfloat16, device_map="auto"
+)
 def inference(input_text):
     start_time = time.time()
     end_time = time.time()
     return {"output": res, "latency": f"{end_time - start_time:.2f} seconds"}
 def gradio_interface(input_text):
     nexa_query = f"Below is the query from the users, please call the correct function and generate the parameters to call the function.\n\nQuery: {input_text} \n\nResponse:"
     result = inference(nexa_query)
     fn=gradio_interface,
     inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your query here..."),
     outputs=[gr.outputs.Textbox(label="Output"), gr.outputs.Textbox(label="Latency")],
+    title=title,
+    description=description,
+    examples=EXAMPLES
 )
 if __name__ == "__main__":