File size: 10,269 Bytes
1d99a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import io
import time
import gradio as gr
import base64
import openai
from openai import OpenAI

def describe_food_in_image(image, api_key, vision_model, progress=gr.Progress()):
    start_time = time.time()
    progress(0, desc="Initializing image analysis...")
    sn_dev_client = OpenAI(
        base_url="https://api.sambanova.ai/v1",
        api_key=api_key
    )
    
    progress(0.2, desc="Converting image to base64...")
    # Convert the image to base64
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
    
    progress(0.3, desc="Preparing API request...")
    # Prepare the message with the image
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "What type of food is shown in this image? Please provide a brief description."
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{base64_image}"
                    }
                }
            ]
        }
    ]

    progress(0.4, desc="Analyzing image...")
    # Make the API call
    try:
        response = sn_dev_client.chat.completions.create(
            model=vision_model,
            messages=messages,
            max_tokens=150
        )
        progress(1.0, desc="Image analysis complete")
        output = response.choices[0].message.content
        # Return the model's description
        thinking_time = time.time() - start_time
        return output, thinking_time
    except Exception as e:
        error_message = f"Error: {str(e)}"
        raise gr.Error(error_message)

def desc_to_ingredients_recipe(food_description, api_key, language_model, progress=gr.Progress()):
    # Run the code through the SN API (and profile)
    start_time = time.time()
    progress(0.6, desc="Initializing recipe generation...")
    # Sambanova API base URL
    SN_API_BASE = "https://api.sambanova.ai/v1"
    sn_client = openai.OpenAI(api_key=api_key, base_url=SN_API_BASE)
    progress(0.7, desc="Preparing recipe prompt...")
    # Create the prompt 
    budget = 10 # between 1-100
    SYSTEM_PROMPT = f"""
You are a helpful assistant in normal conversation.
When given a problem to solve, you are an expert problem-solving assistant. 
Your task is to provide a detailed, step-by-step solution to a given question with the following budget of {budget}. 
Follow these instructions carefully:
1. Read the given question carefully 
2. Generate a detailed, logical step-by-step solution.
3. Each step should be formatted as in the example below. 
4. You are allowed to use at most {budget} steps (starting budget), 
   keep track of it by counting down from the budget.  
   STOP GENERATING MORE STEPS when hitting 0. 
5. Do a self-reflection when you are unsure about how to proceed, 
   based on the self-reflection and reward, decides whether you need to return 
   to the previous steps.
6. After completing the solution steps, reorganize and synthesize the steps 
   into the final answer as in the format given below and do not mention the budget or steps you took.
7. Provide a critical, honest and subjective self-evaluation of your reasoning 
   process at the end. 

Output should be in Markdown format, for example if generating a recipe it would look like this: 

# Recipe
Hi there! Today you'll learn about how to cook a delicious Thai delicacy, Pad Thai! For this you'll need

## Ingredients 
For the sauce:
* 1/2 cup tamarind paste
* 1/4 cup fish sauce
* 1/4 cup palm sugar (or brown sugar)
* 2 tablespoons soy sauce
* 2 tablespoons rice vinegar
* 1 tablespoon vegetable oil
* 2 cloves garlic, minced
* 1 tablespoon grated fresh ginger
* 1/4 teaspoon ground white pepper
* 2 tablespoons water
* Salt, to taste
* Fresh lime wedges, for serving

For the noodles:
* 1 cup rice stick noodles (preferably fresh)

For the protein:
* 1 cup mixed protein (shrimp, chicken, tofu, or a combination)

For the vegetables:
* 1 cup bean sprouts
* 1 cup sliced carrots
* 1 cup sliced red bell pepper
* 2 green onions, thinly sliced
* 1/4 cup chopped peanuts
* 2 tablespoons chopped fresh cilantro (optional)

## Instructions 
1. Prepare the sauce: In a blender or food processor, combine tamarind paste, fish sauce, palm sugar, soy sauce, rice vinegar, garlic, ginger, and white pepper. Blend until smooth. Heat the sauce in a saucepan over medium heat, stirring constantly, until it thickens slightly. Remove from heat and stir in vegetable oil and water. Season with salt to taste.
2. Cook the noodles: Soak the rice stick noodles in hot water for about 5-7 minutes, or according to package instructions. Drain and set aside.
3. Prepare the protein: Cut the protein into bite-sized pieces and cook according to your preference (e.g., grill, sauté, or boil). Set aside.
4. Sauté the vegetables: Heat 2 tablespoons of vegetable oil in a large wok or frying pan over medium-high heat. Add the bean sprouts, carrots, and red bell pepper. Cook, stirring constantly, until the vegetables are tender-crisp.
5. Assemble the Pad Thai: Add the cooked noodles, protein, and sauce to the wok or frying pan. Stir-fry everything together for about 2-3 minutes, until the noodles are well coated with the sauce.
6. Garnish and serve: Transfer the Pad Thai to a serving platter or individual plates. Sprinkle with green onions, peanuts, and cilantro (if using). Serve with fresh lime wedges on the side.

Tips and Variations:
* Use a variety of protein sources, such as shrimp, chicken, tofu, or a combination.
* Add other vegetables, such as mushrooms, zucchini, or baby corn.
* Use different types of noodles, such as rice vermicelli or egg noodles.
* Adjust the level of spiciness to your liking by adding more or less chili flakes.
* Serve with a sprinkle of toasted sesame seeds or chopped scallions for added flavor and texture.

Try it out for yourself, taste it and adjust to your liking. The best part of a good Pad Thai is making it your own!
"""

    PROMPT = f"Convert this description to a fixed list of ingredients and a list of steps in the ingredients: {food_description}"
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    messages.append({"role": "user", "content": PROMPT})

    progress(0.8, desc="Generating recipe...")
    try:
        completion = sn_client.chat.completions.create(model=language_model, messages=messages)
        response = completion.choices[0].message.content
        thinking_time = time.time() - start_time
        progress(0.9, desc="Recipe generation complete")
        return response, thinking_time
    except Exception as e:
        error_message = f"Error: {str(e)}"
        raise gr.Error(error_message)

def process_image(image, api_key, vision_model, language_model, progress=gr.Progress()):
    if not api_key.strip():
        raise ValueError("Please enter your SambaNova API key")
    progress(0, desc="Starting image processing...")
    
    progress(0.1, desc="Analyzing image...")
    # Describe the food in the image
    description, food_desc_time = describe_food_in_image(image, api_key, vision_model, progress)
    
    progress(0.5, desc="Generating recipe...")
    # Generate recipe from description
    recipe, recipe_gen_time = desc_to_ingredients_recipe(description, api_key, language_model, progress)
    
    progress(1.0, desc="Process complete")
    return description, recipe, f"Time to response: {food_desc_time + recipe_gen_time:.2f} seconds"

# Define the Gradio interface using Blocks
with gr.Blocks() as iface:
    gr.Markdown(
        """
        # Image to Recipe using SambaNova APIs 🚀
        
        **Ever go to a restaurant or see some food on TV and think "I could make this at home!". Well now just upload or take a picture of your food and get a recipe to try!**

        *To use this, follow the instructions below:*

        1. Navigate to <a href="https://cloud.sambanova.ai">https://cloud.sambanova.ai</a>, login and copy your API Key
        2. Paste it in the SambaNova API Key box
        3. (optional) Select a different vision or language model
        4. Click the camera to use your device's camera, or upload button and select an image file, or simply paste from clipboard
        5. It'll automatically start processing, wait for a few seconds for the LVM and LLM on SambaNova to run
        6. Read through and try your receipe 😋
        """, container=True
    )
    
    with gr.Row():
        with gr.Column(scale=1):
            api_key_input = gr.Textbox(
                type="password", 
                label="SambaNova API Key", 
                placeholder="Enter your API key here",
                container=True
            )
            image_input = gr.Image(
                type="pil", 
                label="Food Image",
                container=True
            )
            vision_model = gr.Dropdown(
                choices=['Llama-3.2-11B-Vision-Instruct', 'Llama-3.2-90B-Vision-Instruct'],
                value='Llama-3.2-11B-Vision-Instruct',  
                label="Select a vision model"
            )
            language_model = gr.Dropdown(
                choices=["Meta-Llama-3.1-405B-Instruct","Meta-Llama-3.1-70B-Instruct", "Meta-Llama-3.1-8B-Instruct"],
                value='Meta-Llama-3.1-8B-Instruct',  
                label="Select a language model"
            )
            
        with gr.Column(scale=1):
            food_description = gr.Textbox(
                label="Food Description",
                lines=3,
                interactive=False
            )
            processing_time = gr.Textbox(
                label="Processing Time",
                interactive=False
            )
            recipe_output = gr.Markdown(
                label="Recipe", value="Recipe", container=True
            )
    
    # Handle the submit button click
    image_input.change(
        fn=process_image,
        inputs=[image_input, api_key_input, vision_model, language_model],
        outputs=[food_description, recipe_output, processing_time],
        api_name="process_image"
    )

# Launch the app
iface.launch()