Spaces:

KishoreK
/

ActionGemma-Preview

Sleeping

File size: 3,917 Bytes

import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoModelForCausalLM, AutoTokenizer
import json

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    task_instruction = """
You are an expert in composing functions. You are given a question and a set of possible functions. 
Based on the question, you will need to make one or more function/tool calls to achieve the purpose. 
If none of the functions can be used, point it out and refuse to answer. 
If the given question lacks the parameters required by the function, also point it out.
""".strip()

    get_weather_api = {
        "name": "get_weather",
        "description": "Get the current weather for a location",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "The city and state, e.g. San Francisco, New York"
                },
                "unit": {
                    "type": "string",
                    "enum": ["celsius", "fahrenheit"],
                    "description": "The unit of temperature to return"
                }
            },
            "required": ["location"]
        }
    }

    search_api = {
        "name": "search",
        "description": "Search for information on the internet",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "The search query, e.g. 'latest news on AI'"
                }
            },
            "required": ["query"]
        }
    }

    openai_format_tools = [get_weather_api, search_api]

    def convert_to_xlam_tool(tools):
        ''''''
        if isinstance(tools, dict):
            return {
                "name": tools["name"],
                "description": tools["description"],
                "parameters": {k: v for k, v in tools["parameters"].get("properties", {}).items()}
            }
        elif isinstance(tools, list):
            return [convert_to_xlam_tool(tool) for tool in tools]
        else:
            return tools

    user_query = message
    tools = openai_format_tools
    messages = [{
        "role" : "system",
        "content" : task_instruction
    },{
        "role" : "user",
        "content" : user_query
    },{
        "role": "tools",
        "content": json.dumps(convert_to_xlam_tool(tools))
    }]

    model = AutoModelForCausalLM.from_pretrained("KishoreK/ActionGemma-9B", load_in_4bit=True, device_map="auto", use_cache=True,low_cpu_mem_usage=True )
    tokenizer = AutoTokenizer.from_pretrained("KishoreK/ActionGemma-9B")
    inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
    outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
    return tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are an expert in composing functions.", label="System message"),
    ],
    examples=["अमेरिका के राष्ट्रपति कौन है?"],
    description="This is ActionGemma, LAM with multi-lingual capabilities. currently this model is prompted with only 2 tools available : get_weather_api and search_api. Integrations for more api's will be coming soon."
)


if __name__ == "__main__":
    demo.launch()