Spaces:
Runtime error
Runtime error
import io | |
import time | |
import gradio as gr | |
import base64 | |
import openai | |
from openai import OpenAI | |
def describe_food_in_image(image, api_key, vision_model, progress=gr.Progress()): | |
start_time = time.time() | |
progress(0, desc="Initializing image analysis...") | |
sn_dev_client = OpenAI( | |
base_url="https://api.sambanova.ai/v1", | |
api_key=api_key | |
) | |
progress(0.2, desc="Converting image to base64...") | |
# Convert the image to base64 | |
buffered = io.BytesIO() | |
image.save(buffered, format="PNG") | |
base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8') | |
progress(0.3, desc="Preparing API request...") | |
# Prepare the message with the image | |
messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": "What type of food is shown in this image? Please provide a brief description." | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/png;base64,{base64_image}" | |
} | |
} | |
] | |
} | |
] | |
progress(0.4, desc="Analyzing image...") | |
# Make the API call | |
try: | |
response = sn_dev_client.chat.completions.create( | |
model=vision_model, | |
messages=messages, | |
max_tokens=150 | |
) | |
progress(1.0, desc="Image analysis complete") | |
output = response.choices[0].message.content | |
# Return the model's description | |
thinking_time = time.time() - start_time | |
return output, thinking_time | |
except Exception as e: | |
error_message = f"Error: {str(e)}" | |
raise gr.Error(error_message) | |
def desc_to_ingredients_recipe(food_description, api_key, language_model, progress=gr.Progress()): | |
# Run the code through the SN API (and profile) | |
start_time = time.time() | |
progress(0.6, desc="Initializing recipe generation...") | |
# Sambanova API base URL | |
SN_API_BASE = "https://api.sambanova.ai/v1" | |
sn_client = openai.OpenAI(api_key=api_key, base_url=SN_API_BASE) | |
progress(0.7, desc="Preparing recipe prompt...") | |
# Create the prompt | |
budget = 10 # between 1-100 | |
SYSTEM_PROMPT = f""" | |
You are a helpful assistant in normal conversation. | |
When given a problem to solve, you are an expert problem-solving assistant. | |
Your task is to provide a detailed, step-by-step solution to a given question with the following budget of {budget}. | |
Follow these instructions carefully: | |
1. Read the given question carefully | |
2. Generate a detailed, logical step-by-step solution. | |
3. Each step should be formatted as in the example below. | |
4. You are allowed to use at most {budget} steps (starting budget), | |
keep track of it by counting down from the budget. | |
STOP GENERATING MORE STEPS when hitting 0. | |
5. Do a self-reflection when you are unsure about how to proceed, | |
based on the self-reflection and reward, decides whether you need to return | |
to the previous steps. | |
6. After completing the solution steps, reorganize and synthesize the steps | |
into the final answer as in the format given below and do not mention the budget or steps you took. | |
7. Provide a critical, honest and subjective self-evaluation of your reasoning | |
process at the end. | |
Output should be in Markdown format, for example if generating a recipe it would look like this: | |
# Recipe | |
Hi there! Today you'll learn about how to cook a delicious Thai delicacy, Pad Thai! For this you'll need | |
## Ingredients | |
For the sauce: | |
* 1/2 cup tamarind paste | |
* 1/4 cup fish sauce | |
* 1/4 cup palm sugar (or brown sugar) | |
* 2 tablespoons soy sauce | |
* 2 tablespoons rice vinegar | |
* 1 tablespoon vegetable oil | |
* 2 cloves garlic, minced | |
* 1 tablespoon grated fresh ginger | |
* 1/4 teaspoon ground white pepper | |
* 2 tablespoons water | |
* Salt, to taste | |
* Fresh lime wedges, for serving | |
For the noodles: | |
* 1 cup rice stick noodles (preferably fresh) | |
For the protein: | |
* 1 cup mixed protein (shrimp, chicken, tofu, or a combination) | |
For the vegetables: | |
* 1 cup bean sprouts | |
* 1 cup sliced carrots | |
* 1 cup sliced red bell pepper | |
* 2 green onions, thinly sliced | |
* 1/4 cup chopped peanuts | |
* 2 tablespoons chopped fresh cilantro (optional) | |
## Instructions | |
1. Prepare the sauce: In a blender or food processor, combine tamarind paste, fish sauce, palm sugar, soy sauce, rice vinegar, garlic, ginger, and white pepper. Blend until smooth. Heat the sauce in a saucepan over medium heat, stirring constantly, until it thickens slightly. Remove from heat and stir in vegetable oil and water. Season with salt to taste. | |
2. Cook the noodles: Soak the rice stick noodles in hot water for about 5-7 minutes, or according to package instructions. Drain and set aside. | |
3. Prepare the protein: Cut the protein into bite-sized pieces and cook according to your preference (e.g., grill, sauté, or boil). Set aside. | |
4. Sauté the vegetables: Heat 2 tablespoons of vegetable oil in a large wok or frying pan over medium-high heat. Add the bean sprouts, carrots, and red bell pepper. Cook, stirring constantly, until the vegetables are tender-crisp. | |
5. Assemble the Pad Thai: Add the cooked noodles, protein, and sauce to the wok or frying pan. Stir-fry everything together for about 2-3 minutes, until the noodles are well coated with the sauce. | |
6. Garnish and serve: Transfer the Pad Thai to a serving platter or individual plates. Sprinkle with green onions, peanuts, and cilantro (if using). Serve with fresh lime wedges on the side. | |
Tips and Variations: | |
* Use a variety of protein sources, such as shrimp, chicken, tofu, or a combination. | |
* Add other vegetables, such as mushrooms, zucchini, or baby corn. | |
* Use different types of noodles, such as rice vermicelli or egg noodles. | |
* Adjust the level of spiciness to your liking by adding more or less chili flakes. | |
* Serve with a sprinkle of toasted sesame seeds or chopped scallions for added flavor and texture. | |
Try it out for yourself, taste it and adjust to your liking. The best part of a good Pad Thai is making it your own! | |
""" | |
PROMPT = f"Convert this description to a fixed list of ingredients and a list of steps in the ingredients: {food_description}" | |
messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
messages.append({"role": "user", "content": PROMPT}) | |
progress(0.8, desc="Generating recipe...") | |
try: | |
completion = sn_client.chat.completions.create(model=language_model, messages=messages) | |
response = completion.choices[0].message.content | |
thinking_time = time.time() - start_time | |
progress(0.9, desc="Recipe generation complete") | |
return response, thinking_time | |
except Exception as e: | |
error_message = f"Error: {str(e)}" | |
raise gr.Error(error_message) | |
def process_image(image, api_key, vision_model, language_model, progress=gr.Progress()): | |
if not api_key.strip(): | |
raise ValueError("Please enter your SambaNova API key") | |
progress(0, desc="Starting image processing...") | |
progress(0.1, desc="Analyzing image...") | |
# Describe the food in the image | |
description, food_desc_time = describe_food_in_image(image, api_key, vision_model, progress) | |
progress(0.5, desc="Generating recipe...") | |
# Generate recipe from description | |
recipe, recipe_gen_time = desc_to_ingredients_recipe(description, api_key, language_model, progress) | |
progress(1.0, desc="Process complete") | |
return description, recipe, f"Time to response: {food_desc_time + recipe_gen_time:.2f} seconds" | |
# Define the Gradio interface using Blocks | |
with gr.Blocks() as iface: | |
gr.Markdown( | |
""" | |
# Image to Recipe using SambaNova APIs 🚀 | |
**Ever go to a restaurant or see some food on TV and think "I could make this at home!". Well now just upload or take a picture of your food and get a recipe to try!** | |
*To use this, follow the instructions below:* | |
1. Navigate to <a href="https://cloud.sambanova.ai">https://cloud.sambanova.ai</a>, login and copy your API Key | |
2. Paste it in the SambaNova API Key box | |
3. (optional) Select a different vision or language model | |
4. Click the camera to use your device's camera, or upload button and select an image file, or simply paste from clipboard | |
5. It'll automatically start processing, wait for a few seconds for the LVM and LLM on SambaNova to run | |
6. Read through and try your receipe 😋 | |
""", container=True | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
api_key_input = gr.Textbox( | |
type="password", | |
label="SambaNova API Key", | |
placeholder="Enter your API key here", | |
container=True | |
) | |
image_input = gr.Image( | |
type="pil", | |
label="Food Image", | |
container=True | |
) | |
vision_model = gr.Dropdown( | |
choices=['Llama-3.2-11B-Vision-Instruct', 'Llama-3.2-90B-Vision-Instruct'], | |
value='Llama-3.2-11B-Vision-Instruct', | |
label="Select a vision model" | |
) | |
language_model = gr.Dropdown( | |
choices=["Meta-Llama-3.1-405B-Instruct","Meta-Llama-3.1-70B-Instruct", "Meta-Llama-3.1-8B-Instruct"], | |
value='Meta-Llama-3.1-8B-Instruct', | |
label="Select a language model" | |
) | |
with gr.Column(scale=1): | |
food_description = gr.Textbox( | |
label="Food Description", | |
lines=3, | |
interactive=False | |
) | |
processing_time = gr.Textbox( | |
label="Processing Time", | |
interactive=False | |
) | |
recipe_output = gr.Markdown( | |
label="Recipe", value="Recipe", container=True | |
) | |
# Handle the submit button click | |
image_input.change( | |
fn=process_image, | |
inputs=[image_input, api_key_input, vision_model, language_model], | |
outputs=[food_description, recipe_output, processing_time], | |
api_name="process_image" | |
) | |
# Launch the app | |
iface.launch() |