Anand Sampat
simplify launch
1d99a1c
import io
import time
import gradio as gr
import base64
import openai
from openai import OpenAI
def describe_food_in_image(image, api_key, vision_model, progress=gr.Progress()):
start_time = time.time()
progress(0, desc="Initializing image analysis...")
sn_dev_client = OpenAI(
base_url="https://api.sambanova.ai/v1",
api_key=api_key
)
progress(0.2, desc="Converting image to base64...")
# Convert the image to base64
buffered = io.BytesIO()
image.save(buffered, format="PNG")
base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
progress(0.3, desc="Preparing API request...")
# Prepare the message with the image
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What type of food is shown in this image? Please provide a brief description."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image}"
}
}
]
}
]
progress(0.4, desc="Analyzing image...")
# Make the API call
try:
response = sn_dev_client.chat.completions.create(
model=vision_model,
messages=messages,
max_tokens=150
)
progress(1.0, desc="Image analysis complete")
output = response.choices[0].message.content
# Return the model's description
thinking_time = time.time() - start_time
return output, thinking_time
except Exception as e:
error_message = f"Error: {str(e)}"
raise gr.Error(error_message)
def desc_to_ingredients_recipe(food_description, api_key, language_model, progress=gr.Progress()):
# Run the code through the SN API (and profile)
start_time = time.time()
progress(0.6, desc="Initializing recipe generation...")
# Sambanova API base URL
SN_API_BASE = "https://api.sambanova.ai/v1"
sn_client = openai.OpenAI(api_key=api_key, base_url=SN_API_BASE)
progress(0.7, desc="Preparing recipe prompt...")
# Create the prompt
budget = 10 # between 1-100
SYSTEM_PROMPT = f"""
You are a helpful assistant in normal conversation.
When given a problem to solve, you are an expert problem-solving assistant.
Your task is to provide a detailed, step-by-step solution to a given question with the following budget of {budget}.
Follow these instructions carefully:
1. Read the given question carefully
2. Generate a detailed, logical step-by-step solution.
3. Each step should be formatted as in the example below.
4. You are allowed to use at most {budget} steps (starting budget),
keep track of it by counting down from the budget.
STOP GENERATING MORE STEPS when hitting 0.
5. Do a self-reflection when you are unsure about how to proceed,
based on the self-reflection and reward, decides whether you need to return
to the previous steps.
6. After completing the solution steps, reorganize and synthesize the steps
into the final answer as in the format given below and do not mention the budget or steps you took.
7. Provide a critical, honest and subjective self-evaluation of your reasoning
process at the end.
Output should be in Markdown format, for example if generating a recipe it would look like this:
# Recipe
Hi there! Today you'll learn about how to cook a delicious Thai delicacy, Pad Thai! For this you'll need
## Ingredients
For the sauce:
* 1/2 cup tamarind paste
* 1/4 cup fish sauce
* 1/4 cup palm sugar (or brown sugar)
* 2 tablespoons soy sauce
* 2 tablespoons rice vinegar
* 1 tablespoon vegetable oil
* 2 cloves garlic, minced
* 1 tablespoon grated fresh ginger
* 1/4 teaspoon ground white pepper
* 2 tablespoons water
* Salt, to taste
* Fresh lime wedges, for serving
For the noodles:
* 1 cup rice stick noodles (preferably fresh)
For the protein:
* 1 cup mixed protein (shrimp, chicken, tofu, or a combination)
For the vegetables:
* 1 cup bean sprouts
* 1 cup sliced carrots
* 1 cup sliced red bell pepper
* 2 green onions, thinly sliced
* 1/4 cup chopped peanuts
* 2 tablespoons chopped fresh cilantro (optional)
## Instructions
1. Prepare the sauce: In a blender or food processor, combine tamarind paste, fish sauce, palm sugar, soy sauce, rice vinegar, garlic, ginger, and white pepper. Blend until smooth. Heat the sauce in a saucepan over medium heat, stirring constantly, until it thickens slightly. Remove from heat and stir in vegetable oil and water. Season with salt to taste.
2. Cook the noodles: Soak the rice stick noodles in hot water for about 5-7 minutes, or according to package instructions. Drain and set aside.
3. Prepare the protein: Cut the protein into bite-sized pieces and cook according to your preference (e.g., grill, sauté, or boil). Set aside.
4. Sauté the vegetables: Heat 2 tablespoons of vegetable oil in a large wok or frying pan over medium-high heat. Add the bean sprouts, carrots, and red bell pepper. Cook, stirring constantly, until the vegetables are tender-crisp.
5. Assemble the Pad Thai: Add the cooked noodles, protein, and sauce to the wok or frying pan. Stir-fry everything together for about 2-3 minutes, until the noodles are well coated with the sauce.
6. Garnish and serve: Transfer the Pad Thai to a serving platter or individual plates. Sprinkle with green onions, peanuts, and cilantro (if using). Serve with fresh lime wedges on the side.
Tips and Variations:
* Use a variety of protein sources, such as shrimp, chicken, tofu, or a combination.
* Add other vegetables, such as mushrooms, zucchini, or baby corn.
* Use different types of noodles, such as rice vermicelli or egg noodles.
* Adjust the level of spiciness to your liking by adding more or less chili flakes.
* Serve with a sprinkle of toasted sesame seeds or chopped scallions for added flavor and texture.
Try it out for yourself, taste it and adjust to your liking. The best part of a good Pad Thai is making it your own!
"""
PROMPT = f"Convert this description to a fixed list of ingredients and a list of steps in the ingredients: {food_description}"
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
messages.append({"role": "user", "content": PROMPT})
progress(0.8, desc="Generating recipe...")
try:
completion = sn_client.chat.completions.create(model=language_model, messages=messages)
response = completion.choices[0].message.content
thinking_time = time.time() - start_time
progress(0.9, desc="Recipe generation complete")
return response, thinking_time
except Exception as e:
error_message = f"Error: {str(e)}"
raise gr.Error(error_message)
def process_image(image, api_key, vision_model, language_model, progress=gr.Progress()):
if not api_key.strip():
raise ValueError("Please enter your SambaNova API key")
progress(0, desc="Starting image processing...")
progress(0.1, desc="Analyzing image...")
# Describe the food in the image
description, food_desc_time = describe_food_in_image(image, api_key, vision_model, progress)
progress(0.5, desc="Generating recipe...")
# Generate recipe from description
recipe, recipe_gen_time = desc_to_ingredients_recipe(description, api_key, language_model, progress)
progress(1.0, desc="Process complete")
return description, recipe, f"Time to response: {food_desc_time + recipe_gen_time:.2f} seconds"
# Define the Gradio interface using Blocks
with gr.Blocks() as iface:
gr.Markdown(
"""
# Image to Recipe using SambaNova APIs 🚀
**Ever go to a restaurant or see some food on TV and think "I could make this at home!". Well now just upload or take a picture of your food and get a recipe to try!**
*To use this, follow the instructions below:*
1. Navigate to <a href="https://cloud.sambanova.ai">https://cloud.sambanova.ai</a>, login and copy your API Key
2. Paste it in the SambaNova API Key box
3. (optional) Select a different vision or language model
4. Click the camera to use your device's camera, or upload button and select an image file, or simply paste from clipboard
5. It'll automatically start processing, wait for a few seconds for the LVM and LLM on SambaNova to run
6. Read through and try your receipe 😋
""", container=True
)
with gr.Row():
with gr.Column(scale=1):
api_key_input = gr.Textbox(
type="password",
label="SambaNova API Key",
placeholder="Enter your API key here",
container=True
)
image_input = gr.Image(
type="pil",
label="Food Image",
container=True
)
vision_model = gr.Dropdown(
choices=['Llama-3.2-11B-Vision-Instruct', 'Llama-3.2-90B-Vision-Instruct'],
value='Llama-3.2-11B-Vision-Instruct',
label="Select a vision model"
)
language_model = gr.Dropdown(
choices=["Meta-Llama-3.1-405B-Instruct","Meta-Llama-3.1-70B-Instruct", "Meta-Llama-3.1-8B-Instruct"],
value='Meta-Llama-3.1-8B-Instruct',
label="Select a language model"
)
with gr.Column(scale=1):
food_description = gr.Textbox(
label="Food Description",
lines=3,
interactive=False
)
processing_time = gr.Textbox(
label="Processing Time",
interactive=False
)
recipe_output = gr.Markdown(
label="Recipe", value="Recipe", container=True
)
# Handle the submit button click
image_input.change(
fn=process_image,
inputs=[image_input, api_key_input, vision_model, language_model],
outputs=[food_description, recipe_output, processing_time],
api_name="process_image"
)
# Launch the app
iface.launch()