# Inference import gradio as gr from huggingface_hub import InferenceClient model_text = "google/gemma-2-27b-it" model_vision = "google/paligemma2-3b-pt-224" client = InferenceClient() def fn_text( prompt, history, #system_prompt, max_tokens, temperature, top_p, ): #messages = [{"role": "system", "content": system_prompt}] #history.append(messages[0]) #messages.append({"role": "user", "content": prompt}) #history.append(messages[1]) messages = [{"role": "user", "content": prompt}] history.append(messages[0]) stream = client.chat.completions.create( model = model_text, messages = history, max_tokens = max_tokens, temperature = temperature, top_p = top_p, stream = True, ) chunks = [] for chunk in stream: chunks.append(chunk.choices[0].delta.content or "") yield "".join(chunks) app_text = gr.ChatInterface( fn = fn_text, type = "messages", additional_inputs = [ #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"), ], title = "Google Gemma", description = model_text, ) def fn_vision( prompt, image_url, #system_prompt, max_tokens, temperature, top_p, ): messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}] if image_url: messages[0]["content"].append({"type": "image_url", "image_url": {"url": image_url}}) stream = client.chat.completions.create( model = model_vision, messages = messages, max_tokens = max_tokens, temperature = temperature, top_p = top_p, stream = True, ) chunks = [] for chunk in stream: chunks.append(chunk.choices[0].delta.content or "") yield "".join(chunks) app_vision = gr.Interface( fn = fn_vision, inputs = [ gr.Textbox(label="Prompt"), gr.Textbox(label="Image URL") ], outputs = [ gr.Textbox(label="Output") ], additional_inputs = [ #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"), ], title = "Google Gemma", description = model_vision, ) app = gr.TabbedInterface( [app_text, app_vision], ["Text", "Vision"] ).launch() #if __name__ == "__main__": # app.launch()