Spaces:
Runtime error
Runtime error
File size: 10,269 Bytes
1d99a1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
import io
import time
import gradio as gr
import base64
import openai
from openai import OpenAI
def describe_food_in_image(image, api_key, vision_model, progress=gr.Progress()):
start_time = time.time()
progress(0, desc="Initializing image analysis...")
sn_dev_client = OpenAI(
base_url="https://api.sambanova.ai/v1",
api_key=api_key
)
progress(0.2, desc="Converting image to base64...")
# Convert the image to base64
buffered = io.BytesIO()
image.save(buffered, format="PNG")
base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
progress(0.3, desc="Preparing API request...")
# Prepare the message with the image
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What type of food is shown in this image? Please provide a brief description."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image}"
}
}
]
}
]
progress(0.4, desc="Analyzing image...")
# Make the API call
try:
response = sn_dev_client.chat.completions.create(
model=vision_model,
messages=messages,
max_tokens=150
)
progress(1.0, desc="Image analysis complete")
output = response.choices[0].message.content
# Return the model's description
thinking_time = time.time() - start_time
return output, thinking_time
except Exception as e:
error_message = f"Error: {str(e)}"
raise gr.Error(error_message)
def desc_to_ingredients_recipe(food_description, api_key, language_model, progress=gr.Progress()):
# Run the code through the SN API (and profile)
start_time = time.time()
progress(0.6, desc="Initializing recipe generation...")
# Sambanova API base URL
SN_API_BASE = "https://api.sambanova.ai/v1"
sn_client = openai.OpenAI(api_key=api_key, base_url=SN_API_BASE)
progress(0.7, desc="Preparing recipe prompt...")
# Create the prompt
budget = 10 # between 1-100
SYSTEM_PROMPT = f"""
You are a helpful assistant in normal conversation.
When given a problem to solve, you are an expert problem-solving assistant.
Your task is to provide a detailed, step-by-step solution to a given question with the following budget of {budget}.
Follow these instructions carefully:
1. Read the given question carefully
2. Generate a detailed, logical step-by-step solution.
3. Each step should be formatted as in the example below.
4. You are allowed to use at most {budget} steps (starting budget),
keep track of it by counting down from the budget.
STOP GENERATING MORE STEPS when hitting 0.
5. Do a self-reflection when you are unsure about how to proceed,
based on the self-reflection and reward, decides whether you need to return
to the previous steps.
6. After completing the solution steps, reorganize and synthesize the steps
into the final answer as in the format given below and do not mention the budget or steps you took.
7. Provide a critical, honest and subjective self-evaluation of your reasoning
process at the end.
Output should be in Markdown format, for example if generating a recipe it would look like this:
# Recipe
Hi there! Today you'll learn about how to cook a delicious Thai delicacy, Pad Thai! For this you'll need
## Ingredients
For the sauce:
* 1/2 cup tamarind paste
* 1/4 cup fish sauce
* 1/4 cup palm sugar (or brown sugar)
* 2 tablespoons soy sauce
* 2 tablespoons rice vinegar
* 1 tablespoon vegetable oil
* 2 cloves garlic, minced
* 1 tablespoon grated fresh ginger
* 1/4 teaspoon ground white pepper
* 2 tablespoons water
* Salt, to taste
* Fresh lime wedges, for serving
For the noodles:
* 1 cup rice stick noodles (preferably fresh)
For the protein:
* 1 cup mixed protein (shrimp, chicken, tofu, or a combination)
For the vegetables:
* 1 cup bean sprouts
* 1 cup sliced carrots
* 1 cup sliced red bell pepper
* 2 green onions, thinly sliced
* 1/4 cup chopped peanuts
* 2 tablespoons chopped fresh cilantro (optional)
## Instructions
1. Prepare the sauce: In a blender or food processor, combine tamarind paste, fish sauce, palm sugar, soy sauce, rice vinegar, garlic, ginger, and white pepper. Blend until smooth. Heat the sauce in a saucepan over medium heat, stirring constantly, until it thickens slightly. Remove from heat and stir in vegetable oil and water. Season with salt to taste.
2. Cook the noodles: Soak the rice stick noodles in hot water for about 5-7 minutes, or according to package instructions. Drain and set aside.
3. Prepare the protein: Cut the protein into bite-sized pieces and cook according to your preference (e.g., grill, sauté, or boil). Set aside.
4. Sauté the vegetables: Heat 2 tablespoons of vegetable oil in a large wok or frying pan over medium-high heat. Add the bean sprouts, carrots, and red bell pepper. Cook, stirring constantly, until the vegetables are tender-crisp.
5. Assemble the Pad Thai: Add the cooked noodles, protein, and sauce to the wok or frying pan. Stir-fry everything together for about 2-3 minutes, until the noodles are well coated with the sauce.
6. Garnish and serve: Transfer the Pad Thai to a serving platter or individual plates. Sprinkle with green onions, peanuts, and cilantro (if using). Serve with fresh lime wedges on the side.
Tips and Variations:
* Use a variety of protein sources, such as shrimp, chicken, tofu, or a combination.
* Add other vegetables, such as mushrooms, zucchini, or baby corn.
* Use different types of noodles, such as rice vermicelli or egg noodles.
* Adjust the level of spiciness to your liking by adding more or less chili flakes.
* Serve with a sprinkle of toasted sesame seeds or chopped scallions for added flavor and texture.
Try it out for yourself, taste it and adjust to your liking. The best part of a good Pad Thai is making it your own!
"""
PROMPT = f"Convert this description to a fixed list of ingredients and a list of steps in the ingredients: {food_description}"
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
messages.append({"role": "user", "content": PROMPT})
progress(0.8, desc="Generating recipe...")
try:
completion = sn_client.chat.completions.create(model=language_model, messages=messages)
response = completion.choices[0].message.content
thinking_time = time.time() - start_time
progress(0.9, desc="Recipe generation complete")
return response, thinking_time
except Exception as e:
error_message = f"Error: {str(e)}"
raise gr.Error(error_message)
def process_image(image, api_key, vision_model, language_model, progress=gr.Progress()):
if not api_key.strip():
raise ValueError("Please enter your SambaNova API key")
progress(0, desc="Starting image processing...")
progress(0.1, desc="Analyzing image...")
# Describe the food in the image
description, food_desc_time = describe_food_in_image(image, api_key, vision_model, progress)
progress(0.5, desc="Generating recipe...")
# Generate recipe from description
recipe, recipe_gen_time = desc_to_ingredients_recipe(description, api_key, language_model, progress)
progress(1.0, desc="Process complete")
return description, recipe, f"Time to response: {food_desc_time + recipe_gen_time:.2f} seconds"
# Define the Gradio interface using Blocks
with gr.Blocks() as iface:
gr.Markdown(
"""
# Image to Recipe using SambaNova APIs 🚀
**Ever go to a restaurant or see some food on TV and think "I could make this at home!". Well now just upload or take a picture of your food and get a recipe to try!**
*To use this, follow the instructions below:*
1. Navigate to <a href="https://cloud.sambanova.ai">https://cloud.sambanova.ai</a>, login and copy your API Key
2. Paste it in the SambaNova API Key box
3. (optional) Select a different vision or language model
4. Click the camera to use your device's camera, or upload button and select an image file, or simply paste from clipboard
5. It'll automatically start processing, wait for a few seconds for the LVM and LLM on SambaNova to run
6. Read through and try your receipe 😋
""", container=True
)
with gr.Row():
with gr.Column(scale=1):
api_key_input = gr.Textbox(
type="password",
label="SambaNova API Key",
placeholder="Enter your API key here",
container=True
)
image_input = gr.Image(
type="pil",
label="Food Image",
container=True
)
vision_model = gr.Dropdown(
choices=['Llama-3.2-11B-Vision-Instruct', 'Llama-3.2-90B-Vision-Instruct'],
value='Llama-3.2-11B-Vision-Instruct',
label="Select a vision model"
)
language_model = gr.Dropdown(
choices=["Meta-Llama-3.1-405B-Instruct","Meta-Llama-3.1-70B-Instruct", "Meta-Llama-3.1-8B-Instruct"],
value='Meta-Llama-3.1-8B-Instruct',
label="Select a language model"
)
with gr.Column(scale=1):
food_description = gr.Textbox(
label="Food Description",
lines=3,
interactive=False
)
processing_time = gr.Textbox(
label="Processing Time",
interactive=False
)
recipe_output = gr.Markdown(
label="Recipe", value="Recipe", container=True
)
# Handle the submit button click
image_input.change(
fn=process_image,
inputs=[image_input, api_key_input, vision_model, language_model],
outputs=[food_description, recipe_output, processing_time],
api_name="process_image"
)
# Launch the app
iface.launch() |