Spaces:

sasan
/

KITT

Build error

App Files Files Community

sasan commited on May 21, 2024

Commit

962f893

•

1 Parent(s): bd669ec

chore: Add code interpreter skill and update vehicle status template

Browse files

Files changed (7) hide show

.vscode/launch.json +15 -0
kitt/core/model.py +46 -17
kitt/skills/__init__.py +1 -0
kitt/skills/interpreter.py +52 -0
kitt/skills/routing.py +42 -6
kitt/skills/vehicle.py +3 -7
main.py +33 -2

.vscode/launch.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "RUN KITT",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "main.py",
+            "console": "integratedTerminal"
+        }
+    ]
+}

kitt/core/model.py CHANGED Viewed

@@ -6,6 +6,7 @@ from langchain.memory import ChatMessageHistory
 from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
 from langchain_core.utils.function_calling import convert_to_openai_function
 import ollama
 from pydantic import BaseModel
 from loguru import logger
@@ -27,12 +28,10 @@ class FunctionCall(BaseModel):
 schema_json = json.loads(FunctionCall.schema_json())
-HRMS_SYSTEM_PROMPT = """<|begin_of_text|>
-<|im_start|>system
 You are a function calling AI agent with self-recursion.
 You can call only one function at a time and analyse data you get from function response.
 You are provided with function signatures within <tools></tools> XML tags.
-{car_status}
 You may use agentic frameworks for reasoning and planning to help with user query.
 Please call a function and wait for function results to be provided to you in the next iteration.
@@ -67,8 +66,14 @@ Assistant:
 {{"arguments": {{"search_query": "Spa"}}, "name": "search_points_of_interests"}}
 </tool_call>
-When asked for the weather or points of interest, use the appropriate tool with the current location of the car. Unless the user provides a location, then use that location.
 Use the following pydantic model json schema for each tool call you will make:
 {schema}
@@ -145,6 +150,8 @@ def parse_tool_calls(text):
     pattern = r"<tool_call>\s*(\{.*?\})\s*</tool_call>"
     if not text.startswith("<tool_call>"):
         return [], []
     matches = re.findall(pattern, text, re.DOTALL)
@@ -164,12 +171,22 @@ def parse_tool_calls(text):
 def process_response(user_query, res, history, tools, depth):
     """Returns True if the response contains tool calls, False otherwise."""
     logger.debug(f"Processing response: {res}")
-    tool_calls, errors = parse_tool_calls(res)
     # TODO: Handle errors
     if not tool_calls:
         return False, tool_calls, errors
     # tool_results = ""
-    tool_results = f"Agent iteration {depth} to assist with user query: {user_query}\n"
     for tool_call in tool_calls:
         # TODO: Extra Validation
         # Call the function
@@ -185,12 +202,11 @@ def process_response(user_query, res, history, tools, depth):
     tool_results = tool_results.strip()
     print(f"Tool results: {tool_results}")
-    tool_call_id = uuid.uuid4().hex
     history.add_message(ToolMessage(content=tool_results, tool_call_id=tool_call_id))
     return True, tool_calls, errors
-def run_inference_step(history, tools, schema_json, dry_run=False):
     # If we decide to call a function, we need to generate the prompt for the model
     # based on the history of the conversation so far.
     # not break the loop
@@ -199,17 +215,26 @@ def run_inference_step(history, tools, schema_json, dry_run=False):
     print(f"Prompt is:{prompt + AI_PREAMBLE}\n------------------\n")
     data = {
-        "prompt": prompt + AI_PREAMBLE,
         # "streaming": False,
         # "model": "smangrul/llama-3-8b-instruct-function-calling",
         # "model": "elvee/hermes-2-pro-llama-3:8b-Q5_K_M",
         # "model": "NousResearch/Hermes-2-Pro-Llama-3-8B",
-        "model": "interstellarninja/hermes-2-pro-llama-3-8b",
         "raw": True,
         "options": {
             "temperature": 0.8,
             # "max_tokens": 1500,
             "num_predict": 1500,
             # "num_predict": 1500,
             # "max_tokens": 1500,
         },
@@ -218,8 +243,10 @@ def run_inference_step(history, tools, schema_json, dry_run=False):
     if dry_run:
         print(prompt + AI_PREAMBLE)
         return "Didn't really run it."
-    out = ollama.generate(**data)
     logger.debug(f"Response from model: {out}")
     res = out["response"]
@@ -227,18 +254,20 @@ def run_inference_step(history, tools, schema_json, dry_run=False):
 def process_query(user_query: str, history: ChatMessageHistory, tools):
-    history.add_message(HumanMessage(content=user_query))
     for depth in range(10):
-        out = run_inference_step(history, tools, schema_json)
         print(f"Inference step result:\n{out}\n------------------\n")
         history.add_message(AIMessage(content=out))
         to_continue, tool_calls, errors = process_response(
             user_query, out, history, tools, depth
         )
         if errors:
-            history.add_message(
-                AIMessage(content=f"Errors in tool calls: {errors}")
-            )
         if not to_continue:
             print(f"This is the answer, no more iterations: {out}")

 from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
 from langchain_core.utils.function_calling import convert_to_openai_function
 import ollama
+from ollama import Client
 from pydantic import BaseModel
 from loguru import logger
 schema_json = json.loads(FunctionCall.schema_json())
+HRMS_SYSTEM_PROMPT = """<|im_start|>system
 You are a function calling AI agent with self-recursion.
 You can call only one function at a time and analyse data you get from function response.
 You are provided with function signatures within <tools></tools> XML tags.
 You may use agentic frameworks for reasoning and planning to help with user query.
 Please call a function and wait for function results to be provided to you in the next iteration.
 {{"arguments": {{"search_query": "Spa"}}, "name": "search_points_of_interests"}}
 </tool_call>
+Example 3:
+User: How long will it take to get to the destination?
+Assistant:
+<tool_call>
+{{"arguments": {{"destination": ""}}, "name": "calculate_route"}}
+When asked for the weather or points of interest, use the appropriate tool with the current location of the car. Unless the user provides a location, then use that location.
+Always assume user wants to travel by car.
 Use the following pydantic model json schema for each tool call you will make:
 {schema}
     pattern = r"<tool_call>\s*(\{.*?\})\s*</tool_call>"
     if not text.startswith("<tool_call>"):
+        if "<tool_call>" in text:
+            raise ValueError("<text_and_tool_call>")
         return [], []
     matches = re.findall(pattern, text, re.DOTALL)
 def process_response(user_query, res, history, tools, depth):
     """Returns True if the response contains tool calls, False otherwise."""
     logger.debug(f"Processing response: {res}")
+    tool_results = f"Agent iteration {depth} to assist with user query: {user_query}\n"
+    tool_call_id = uuid.uuid4().hex
+    try:
+        tool_calls, errors = parse_tool_calls(res)
+    except ValueError as e:
+        if "<text_and_tool_call>" in str(e):
+            tool_results += f"A mix of text and tool_call was found, you must either answer the query in a short sentence or use tool_call not both. Try again, this time only using tool_call."
+            history.add_message(
+                ToolMessage(content=tool_results, tool_call_id=tool_call_id)
+            )
+            return True, [], []
     # TODO: Handle errors
     if not tool_calls:
         return False, tool_calls, errors
     # tool_results = ""
     for tool_call in tool_calls:
         # TODO: Extra Validation
         # Call the function
     tool_results = tool_results.strip()
     print(f"Tool results: {tool_results}")
     history.add_message(ToolMessage(content=tool_results, tool_call_id=tool_call_id))
     return True, tool_calls, errors
+def run_inference_step(depth, history, tools, schema_json, dry_run=False):
     # If we decide to call a function, we need to generate the prompt for the model
     # based on the history of the conversation so far.
     # not break the loop
     print(f"Prompt is:{prompt + AI_PREAMBLE}\n------------------\n")
     data = {
+        "prompt": prompt
+        + "\nThis is the first turn and you don't have <tool_results> to analyze yet"
+        + AI_PREAMBLE,
         # "streaming": False,
         # "model": "smangrul/llama-3-8b-instruct-function-calling",
         # "model": "elvee/hermes-2-pro-llama-3:8b-Q5_K_M",
         # "model": "NousResearch/Hermes-2-Pro-Llama-3-8B",
+        # "model": "interstellarninja/hermes-2-pro-llama-3-8b",
+        "model": "dolphin-llama3:8b",
+        # "model": "dolphin-llama3:70b",
         "raw": True,
         "options": {
             "temperature": 0.8,
             # "max_tokens": 1500,
             "num_predict": 1500,
+            "mirostat": 1,
+            # "mirostat_tau": 2,
+            "repeat_penalty": 1.5,
+            "top_k": 25,
+            "top_p": 0.5,
             # "num_predict": 1500,
             # "max_tokens": 1500,
         },
     if dry_run:
         print(prompt + AI_PREAMBLE)
         return "Didn't really run it."
+    client = Client(host='http://localhost:11444')
+    # out = ollama.generate(**data)
+    out = client.generate(**data)
     logger.debug(f"Response from model: {out}")
     res = out["response"]
 def process_query(user_query: str, history: ChatMessageHistory, tools):
+    # Add vehicle status to the history
+    user_query_status = (
+        f"Given that:\n{vehicle_status()[0]}\nAnswer the following:\n{user_query}"
+    )
+    history.add_message(HumanMessage(content=user_query_status))
     for depth in range(10):
+        out = run_inference_step(depth, history, tools, schema_json)
         print(f"Inference step result:\n{out}\n------------------\n")
         history.add_message(AIMessage(content=out))
         to_continue, tool_calls, errors = process_response(
             user_query, out, history, tools, depth
         )
         if errors:
+            history.add_message(AIMessage(content=f"Errors in tool calls: {errors}"))
         if not to_continue:
             print(f"This is the answer, no more iterations: {out}")

kitt/skills/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .weather import get_weather_current_location, get_weather, get_forecast
 from .routing import find_route
 from .poi import search_points_of_interests, search_along_route_w_coordinates
 from .vehicle import vehicle_status

 from .routing import find_route
 from .poi import search_points_of_interests, search_along_route_w_coordinates
 from .vehicle import vehicle_status
+from .interpreter import code_interpreter

kitt/skills/interpreter.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import inspect
+# From https://github.com/NousResearch/Hermes-Function-Calling
+def code_interpreter(code_markdown: str) -> dict | str:
+    """
+    Execute the provided Python code string on the terminal using exec.
+    The string should contain valid, executable and pure Python code in markdown syntax.
+    Code should also import any required Python packages.
+    Args:
+        code_markdown (str): The Python code with markdown syntax to be executed.
+            For example: ```python\n<code-string>\n```
+    Returns:
+        dict | str: A dictionary containing variables declared and values returned by function calls,
+            or an error message if an exception occurred.
+    Note:
+        Use this function with caution, as executing arbitrary code can pose security risks. Use it only for numerical calculations.
+    """
+    try:
+        # Extracting code from Markdown code block
+        code_lines = code_markdown.split('\n')[1:-1]
+        code_without_markdown = '\n'.join(code_lines)
+        # Create a new namespace for code execution
+        exec_namespace = {}
+        # Execute the code in the new namespace
+        exec(code_without_markdown, exec_namespace)
+        # Collect variables and function call results
+        result_dict = {}
+        for name, value in exec_namespace.items():
+            if callable(value):
+                try:
+                    result_dict[name] = value()
+                except TypeError:
+                    # If the function requires arguments, attempt to call it with arguments from the namespace
+                    arg_names = inspect.getfullargspec(value).args
+                    args = {arg_name: exec_namespace.get(arg_name) for arg_name in arg_names}
+                    result_dict[name] = value(**args)
+            elif not name.startswith('_'):  # Exclude variables starting with '_'
+                result_dict[name] = value
+        return result_dict
+    except Exception as e:
+        error_message = f"An error occurred: {e}"
+        return error_message

kitt/skills/routing.py CHANGED Viewed

@@ -90,10 +90,41 @@ def find_route_tomtom(
     }, response
-def find_route(destination=""):
-    """This function finds a route to a destination and returns the distance and the estimated time to go to a specific destination\
- from the current location.
-    :param destination (string): Required. The destination
     """
     if not destination:
         destination = vehicle.destination
@@ -114,7 +145,13 @@ def find_route(destination=""):
     trip_info, raw_response = find_route_tomtom(
         lat_depart, lon_depart, lat_dest, lon_dest, departure_time
     )
     distance, duration, arrival_time = (
         trip_info["distance_m"],
         trip_info["duration_s"],
@@ -138,5 +175,4 @@ def find_route(destination=""):
     arrival_hour_display = arrival_time.strftime("%H:%M")
     # return the distance and time
-    return f"The route to {destination} is {distance_km:.2f} km which takes {time_display}. Leaving now, the arrival time is estimated at {arrival_hour_display}."
-    # raw_response["routes"][0]["legs"][0]["points"]

     }, response
+def find_route_a_to_b(origin="", destination=""):
+    """Get a route between origin and destination.
+    Args:
+    origin (string): Optional. The origin name.
+    destination (string): Optional. The destination name.
+    """
+    if not destination:
+        destination = vehicle.destination
+    lat_dest, lon_dest = find_coordinates(destination)
+    print(f"lat_dest: {lat_dest}, lon_dest: {lon_dest}")
+    if not origin:
+        # Extract the latitude and longitude of the vehicle
+        vehicle_coordinates = getattr(vehicle, "location_coordinates")
+        lat_depart, lon_depart = vehicle_coordinates
+    else:
+        lat_depart, lon_depart = find_coordinates(origin)
+    print(f"lat_depart: {lat_depart}, lon_depart: {lon_depart}")
+    date = getattr(vehicle, "date")
+    time = getattr(vehicle, "time")
+    departure_time = f"{date}T{time}"
+    trip_info, raw_response = find_route_tomtom(
+        lat_depart, lon_depart, lat_dest, lon_dest, departure_time
+    )
+    return _format_tomtom_trip_info(trip_info, destination)
+def find_route(destination):
+    """Get a route to a destination from the current location of the vehicle.
+    Args:
+    destination (string): Optional. The destination name.
     """
     if not destination:
         destination = vehicle.destination
     trip_info, raw_response = find_route_tomtom(
         lat_depart, lon_depart, lat_dest, lon_dest, departure_time
     )
+    return _format_tomtom_trip_info(trip_info, destination)
+    # raw_response["routes"][0]["legs"][0]["points"]
+def _format_tomtom_trip_info(trip_info, destination="destination"):
     distance, duration, arrival_time = (
         trip_info["distance_m"],
         trip_info["duration_s"],
     arrival_hour_display = arrival_time.strftime("%H:%M")
     # return the distance and time
+    return f"The route to {destination} is {distance_km:.2f} km which takes {time_display}. Leaving now, the arrival time is estimated at {arrival_hour_display}."

kitt/skills/vehicle.py CHANGED Viewed

@@ -1,13 +1,9 @@
 from .common import vehicle
-STATUS_TEMPLATE = """
-The current location is:{location}
-The current Geo coordinates: {lat}, {lon}
-The current time: {time}
-The current date: {date}
-The current destination is: {destination}
-"""
 def vehicle_status() -> tuple[str, dict[str, str]]:

 from .common import vehicle
+STATUS_TEMPLATE = """The current location is: {location} ({lat}, {lon})
+The current date and time: {date} {time}
+The current destination is: {destination}"""
 def vehicle_status() -> tuple[str, dict[str, str]]:

main.py CHANGED Viewed

@@ -26,6 +26,7 @@ from kitt.skills import (
     do_anything_else,
     date_time_info,
     get_weather_current_location,
 )
 from kitt.skills import extract_func_args
 from kitt.core import voice_options, tts_gradio
@@ -124,6 +125,7 @@ tools = [
     StructuredTool.from_function(search_along_route),
     StructuredTool.from_function(date_time_info),
     StructuredTool.from_function(get_weather_current_location),
     # StructuredTool.from_function(do_anything_else),
 ]
@@ -201,6 +203,8 @@ def run_model(query, voice_character, state):
         return run_nexusraven_model(query, voice_character)
     elif model == "llama3":
         return run_llama3_model(query, voice_character)
 def calculate_route_gradio(origin, destination):
@@ -259,12 +263,19 @@ def save_and_transcribe_audio(audio):
         y = y.astype(np.float32)
         y /= np.max(np.abs(y))
         text = transcriber({"sampling_rate": sr, "raw": y})["text"]
     except Exception as e:
         print(f"Error: {e}")
-        return "Error transcribing audio"
     return text
 # to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/
 # in "Insecure origins treated as secure", enable it and relaunch chrome
@@ -337,6 +348,18 @@ def create_demo(tts_server: bool = False, model="llama3", tts=True):
                 input_text = gr.Textbox(
                     value="How is the weather?", label="Input text", interactive=True
                 )
                 vehicle_status = gr.JSON(
                     value=vehicle.model_dump_json(), label="Vehicle status"
                 )
@@ -370,6 +393,11 @@ def create_demo(tts_server: bool = False, model="llama3", tts=True):
             inputs=[input_text, voice_character, state],
             outputs=[output_text, output_audio],
         )
         # Set the vehicle status based on the trip progress
         trip_progress.release(
@@ -380,7 +408,10 @@ def create_demo(tts_server: bool = False, model="llama3", tts=True):
         # Save and transcribe the audio
         input_audio.stop_recording(
-            fn=save_and_transcribe_audio, inputs=[input_audio], outputs=[input_text]
         )
         # Clear the history

     do_anything_else,
     date_time_info,
     get_weather_current_location,
+    code_interpreter,
 )
 from kitt.skills import extract_func_args
 from kitt.core import voice_options, tts_gradio
     StructuredTool.from_function(search_along_route),
     StructuredTool.from_function(date_time_info),
     StructuredTool.from_function(get_weather_current_location),
+    StructuredTool.from_function(code_interpreter),
     # StructuredTool.from_function(do_anything_else),
 ]
         return run_nexusraven_model(query, voice_character)
     elif model == "llama3":
         return run_llama3_model(query, voice_character)
+    return "Error running model", None
 def calculate_route_gradio(origin, destination):
         y = y.astype(np.float32)
         y /= np.max(np.abs(y))
         text = transcriber({"sampling_rate": sr, "raw": y})["text"]
+        gr.Info(f"Transcribed text is: {text}\nProcessing the input...")
     except Exception as e:
         print(f"Error: {e}")
+        return "Error transcribing audio."
     return text
+def save_and_transcribe_run_model(audio, voice_character, state):
+    text = save_and_transcribe_audio(audio)
+    out_text, out_voice = run_model(text, voice_character, state)
+    return text, out_text, out_voice
 # to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/
 # in "Insecure origins treated as secure", enable it and relaunch chrome
                 input_text = gr.Textbox(
                     value="How is the weather?", label="Input text", interactive=True
                 )
+                with gr.Accordion("Debug"):
+                    input_audio_debug = gr.Audio(
+                        type="numpy",
+                        sources=["microphone"],
+                        label="Input audio",
+                        elem_id="input_audio",
+                    )
+                    input_text_debug = gr.Textbox(
+                        value="How is the weather?",
+                        label="Input text",
+                        interactive=True,
+                    )
                 vehicle_status = gr.JSON(
                     value=vehicle.model_dump_json(), label="Vehicle status"
                 )
             inputs=[input_text, voice_character, state],
             outputs=[output_text, output_audio],
         )
+        input_text_debug.submit(
+            fn=run_model,
+            inputs=[input_text, voice_character, state],
+            outputs=[output_text, output_audio],
+        )
         # Set the vehicle status based on the trip progress
         trip_progress.release(
         # Save and transcribe the audio
         input_audio.stop_recording(
+            fn=save_and_transcribe_run_model, inputs=[input_audio, voice_character, state], outputs=[input_text, output_text, output_audio]
+        )
+        input_audio_debug.stop_recording(
+            fn=save_and_transcribe_audio, inputs=[input_audio_debug], outputs=[input_text_debug]
         )
         # Clear the history