GeorgiosIoannouCoder commited on
Commit
703a1fe
β€’
1 Parent(s): 20046d1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +531 -0
app.py ADDED
@@ -0,0 +1,531 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #############################################################################################################################
2
+ # Filename : app.py
3
+ # Description: A Streamlit application to generate recipes given an image of a food and an image of ingredients.
4
+ # Author : Georgios Ioannou
5
+ #
6
+ # Copyright Β© 2024 by Georgios Ioannou
7
+ #############################################################################################################################
8
+ # Import libraries.
9
+
10
+ import openai # gpt-3.5-turbo model inference.
11
+ import os # Load environment variable(s).
12
+ import requests # Send HTTP GET request to Hugging Face models for inference.
13
+ import streamlit as st # Build the GUI of the application.
14
+ import torch # Load Salesforce/blip model(s) on GPU.
15
+
16
+
17
+ from dotenv import load_dotenv, find_dotenv # Read local .env file.
18
+ from langchain.chat_models import ChatOpenAI # Access to OpenAI gpt-3.5-turbo model.
19
+ from langchain.chains import LLMChain # Chain to run queries against LLMs.
20
+
21
+ # A prompt template. It accepts a set of parameters from the user that can be used to generate a prompt for a language model.
22
+ from langchain.prompts import PromptTemplate
23
+ from PIL import Image # Open and identify a given image file.
24
+ from transformers import BlipProcessor, BlipForQuestionAnswering # VQA model inference.
25
+
26
+ #############################################################################################################################
27
+ # Load environment variable(s).
28
+
29
+ load_dotenv(find_dotenv()) # Read local .env file.
30
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
31
+ openai.api_key = os.getenv("OPENAI_API_KEY")
32
+
33
+ #############################################################################################################################
34
+ # Function to apply local CSS.
35
+
36
+
37
+ def local_css(file_name):
38
+ with open(file_name) as f:
39
+ st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
40
+
41
+
42
+ #############################################################################################################################
43
+ # Load the Visual Question Answering (VQA) model directly.
44
+ # Using transformers.
45
+
46
+
47
+ @st.cache_resource
48
+ def load_model():
49
+ blip_processor_base = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
50
+ blip_model_base = BlipForQuestionAnswering.from_pretrained(
51
+ "Salesforce/blip-vqa-base"
52
+ )
53
+
54
+ # Backup model.
55
+ # blip_processor_large = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
56
+ # blip_model_large = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")
57
+ # return blip_processor_large, blip_model_large
58
+
59
+ return blip_processor_base, blip_model_base
60
+
61
+
62
+ #############################################################################################################################
63
+ # General function for any Salesforce/blip model(s).
64
+ # VQA model.
65
+
66
+
67
+ def generate_answer_blip(processor, model, image, question):
68
+ # Prepare image + question.
69
+
70
+ inputs = processor(images=image, text=question, return_tensors="pt")
71
+
72
+ generated_ids = model.generate(**inputs, max_length=50)
73
+
74
+ generated_answer = processor.batch_decode(generated_ids, skip_special_tokens=True)
75
+
76
+ return generated_answer
77
+
78
+
79
+ #############################################################################################################################
80
+ # Generate answer from the Salesforce/blip model(s).
81
+ # VQA model.
82
+
83
+
84
+ @st.cache_resource
85
+ def generate_answer(image, question):
86
+ answer_blip_base = generate_answer_blip(
87
+ processor=blip_processor_base,
88
+ model=blip_model_base,
89
+ image=image,
90
+ question=question,
91
+ )
92
+
93
+ # answer_blip_large = generate_answer_blip(blip_processor_large, blip_model_large, image, question)
94
+ # return answer_blip_large
95
+
96
+ return answer_blip_base
97
+
98
+
99
+ #############################################################################################################################
100
+ # Detect ingredients on an image.
101
+ # Object detection model.
102
+
103
+
104
+ @st.cache_resource
105
+ def generate_ingredients(image):
106
+ API_URL = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50"
107
+
108
+ headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
109
+
110
+ with open(image, "rb") as img:
111
+ data = img.read()
112
+ response = requests.post(url=API_URL, data=data, headers=headers)
113
+ ingredients = response.json()
114
+ return ingredients
115
+
116
+
117
+ #############################################################################################################################
118
+ # Return the recipe generated by the model for the food and ingredients detected by the previous models.
119
+ # Using Langchain.
120
+
121
+
122
+ @st.cache_resource
123
+ def generate_recipe(food, ingredients, chef):
124
+ # Model used here: "gpt-3.5-turbo".
125
+
126
+ # The template can be customized to meet one's needs such as:
127
+ # Generate a recipe, generate a scenario, and generate lyrics of a song.
128
+
129
+ template = """
130
+ You are a chef.
131
+ You must sound like {chef}.
132
+ You must make use of these ingredients: {ingredients}.
133
+ Generate a detailed recipe step by step based on the above constraints for this food: {food}.
134
+ """
135
+
136
+ prompt = PromptTemplate(
137
+ template=template, input_variables=["food", "ingredients", "chef"]
138
+ )
139
+
140
+ recipe_llm = LLMChain(
141
+ llm=ChatOpenAI(
142
+ model_name="gpt-3.5-turbo", temperature=0
143
+ ), # Increasing the temperature, the model becomes more creative and takes longer for inference.
144
+ prompt=prompt,
145
+ verbose=True, # Print intermediate values to the console.
146
+ )
147
+
148
+ recipe = recipe_llm.predict(
149
+ food=food, ingredients=ingredients, chef=chef
150
+ ) # Format prompt with kwargs and pass to LLM.
151
+
152
+ return recipe
153
+
154
+
155
+ #############################################################################################################################
156
+ # Return the speech generated by the model for the recipe.
157
+ # Using inference api.
158
+
159
+
160
+ def generate_speech(response):
161
+ # Model used here: "facebook/mms-tts-eng".
162
+ # Backup model: "espnet/kan-bayashi_ljspeech_vits.
163
+
164
+ # API_URL = (
165
+ # "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
166
+ # )
167
+ API_URL = "https://api-inference.huggingface.co/models/facebook/mms-tts-eng"
168
+
169
+ headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
170
+
171
+ payload = {"inputs": response}
172
+
173
+ response = requests.post(url=API_URL, headers=headers, json=payload)
174
+
175
+ with open("audio.flac", "wb") as file:
176
+ file.write(response.content)
177
+
178
+
179
+ #############################################################################################################################
180
+ # Conversation with OpenAI gpt-3.5-turbo model.
181
+
182
+
183
+ def get_completion_from_messages(messages, model="gpt-3.5-turbo", temperature=0):
184
+ response = openai.ChatCompletion.create(
185
+ model=model,
186
+ messages=messages,
187
+ temperature=temperature, # This is the degree of randomness of the model's output.
188
+ )
189
+ # print(str(response.choices[0].message))
190
+ return response.choices[0].message["content"]
191
+
192
+
193
+ #############################################################################################################################
194
+ # Page title and favicon.
195
+
196
+ st.set_page_config(page_title="ChefBot | Recipe Generator/Assistant", page_icon="🍴")
197
+
198
+ #############################################################################################################################
199
+ # Load the Salesforce/blip model directly.
200
+
201
+ if torch.cuda.is_available():
202
+ device = torch.device("cuda")
203
+ # elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
204
+ # device = torch.device("mps")
205
+ else:
206
+ device = torch.device("cpu")
207
+
208
+ blip_processor_base, blip_model_base = load_model()
209
+ blip_model_base.to(device)
210
+
211
+ #############################################################################################################################
212
+ # Define the chefs for the dropdown menu.
213
+
214
+ chefs = [
215
+ "Gordon Ramsay",
216
+ "Donald Trump",
217
+ "Cardi B",
218
+ ]
219
+
220
+ #############################################################################################################################
221
+ # Main function to create the Streamlit web application.
222
+
223
+
224
+ def main():
225
+ try:
226
+ #####################################################################################################################
227
+
228
+ # Load CSS.
229
+
230
+ local_css("styles/style.css")
231
+
232
+ #####################################################################################################################
233
+
234
+ # Title.
235
+
236
+ title = f"""<h1 align="center" style="font-family: monospace; font-size: 2.1rem; margin-top: -4rem">
237
+ ChefBot - Recipe Generator/Assistant</h1>"""
238
+ st.markdown(title, unsafe_allow_html=True)
239
+ # st.title("ChefBot - Automated Recipe Assistant")
240
+
241
+ #####################################################################################################################
242
+
243
+ # Subtitle.
244
+
245
+ subtitle = f"""<h2 align="center" style="font-family: monospace; font-size: 1.5rem; margin-top: -2rem">
246
+ CUNY Tech Prep Tutorial 2</h2>"""
247
+ st.markdown(subtitle, unsafe_allow_html=True)
248
+
249
+ #####################################################################################################################
250
+
251
+ # Image.
252
+
253
+ image = "./ctp.png"
254
+ left_co, cent_co, last_co = st.columns(3)
255
+ with cent_co:
256
+ st.image(image=image)
257
+
258
+ #####################################################################################################################
259
+
260
+ # Heading 1.
261
+
262
+ heading1 = f"""<h3 align="center" style="font-family: monospace; font-size: 1.5rem; margin-top: 1rem">
263
+ Food</h3>"""
264
+ st.markdown(heading1, unsafe_allow_html=True)
265
+
266
+ #####################################################################################################################
267
+
268
+ # Upload an image.
269
+
270
+ uploaded_file_food = st.file_uploader(
271
+ label="Choose an image:",
272
+ key="food",
273
+ help="An image of the food that you want a recipe for.",
274
+ )
275
+
276
+ #####################################################################################################################
277
+
278
+ if uploaded_file_food is not None:
279
+ # Display the uploaded image.
280
+
281
+ bytes_data = uploaded_file_food.getvalue()
282
+ with open(uploaded_file_food.name, "wb") as file:
283
+ file.write(bytes_data)
284
+ st.image(
285
+ uploaded_file_food, caption="Uploaded Image.", use_column_width=True
286
+ )
287
+
288
+ raw_image = Image.open(uploaded_file_food.name).convert("RGB")
289
+
290
+ #################################################################################################################
291
+
292
+ # VQA model inference.
293
+
294
+ with st.spinner(
295
+ text="Detecting food..."
296
+ ): # Spinner to keep the application interactive.
297
+ # Model inference.
298
+
299
+ answer = generate_answer(raw_image, "Is there a food in the picture?")[
300
+ 0
301
+ ]
302
+
303
+ if answer == "yes":
304
+ st.success(f"Food detected? {answer}", icon="❓")
305
+ question = "What is the food in the picture?"
306
+ food = generate_answer(image=raw_image, question=question)[0]
307
+ st.success(f"Food detected: {food}", icon="βœ…")
308
+
309
+ #################################################################################################################
310
+
311
+ # Heading 2.
312
+
313
+ heading2 = f"""<h3 align="center" style="font-family: monospace; font-size: 1.5rem; margin-top: 1rem">
314
+ Ingredients</h3>"""
315
+ st.markdown(heading2, unsafe_allow_html=True)
316
+
317
+ #################################################################################################################
318
+
319
+ # Upload an image.
320
+
321
+ uploaded_file_ingredients = st.file_uploader(
322
+ label="Choose an image:",
323
+ key="ingredients",
324
+ help="An image of the ingredients that you want to use.",
325
+ )
326
+
327
+ #################################################################################################################
328
+
329
+ if uploaded_file_ingredients is not None:
330
+ # Display the uploaded image.
331
+
332
+ bytes_data = uploaded_file_ingredients.getvalue()
333
+ with open(uploaded_file_ingredients.name, "wb") as file:
334
+ file.write(bytes_data)
335
+ st.image(
336
+ uploaded_file_ingredients,
337
+ caption="Uploaded Image.",
338
+ use_column_width=True,
339
+ )
340
+
341
+ #############################################################################################################
342
+
343
+ # Object detection model inference.
344
+
345
+ with st.spinner(
346
+ text="Detecting Ingredients..."
347
+ ): # Spinner to keep the application interactive.
348
+ # Model inference.
349
+ ingredients_list = generate_ingredients(
350
+ image=uploaded_file_ingredients.name
351
+ )
352
+
353
+ #############################################################################################################
354
+
355
+ # Display/Output the ingredients detected.
356
+
357
+ ingredients = []
358
+ st.success(f"Ingredients:", icon="πŸ“")
359
+ for i, ingredient_dict in enumerate(ingredients_list):
360
+ ingredients.append(ingredient_dict["label"])
361
+ st.write(i + 1, ingredient_dict["label"])
362
+
363
+ #############################################################################################################
364
+
365
+ # Heading 3.
366
+
367
+ heading3 = f"""<h3 align="center" style="font-family: monospace; font-size: 1.5rem; margin-top: 1rem">
368
+ ChefBot</h3>"""
369
+ st.markdown(heading3, unsafe_allow_html=True)
370
+
371
+ #############################################################################################################
372
+
373
+ # Dropdown menu.
374
+
375
+ chef = st.selectbox(
376
+ label="Select your chef:",
377
+ options=chefs,
378
+ help="Select your chef.",
379
+ )
380
+
381
+ #############################################################################################################
382
+
383
+ # Generate Recipe button
384
+
385
+ col1, col2, col3 = st.columns(3)
386
+ with col2:
387
+ button_recipe = st.button("Generate Recipe")
388
+
389
+ #############################################################################################################
390
+
391
+ if button_recipe:
392
+ #########################################################################################################
393
+ # Langchain + OpenAI gpt-3.5-turbo model inference.
394
+
395
+ with st.spinner(
396
+ text="Generating Recipe..."
397
+ ): # Spinner to keep the application interactive.
398
+ # Model inference.
399
+
400
+ recipe = generate_recipe(
401
+ food=food, ingredients=ingredients, chef=chef
402
+ )
403
+
404
+ #########################################################################################################
405
+ # Storing the recipe in session storage for future runs.
406
+
407
+ st.session_state["recipe"] = recipe
408
+
409
+ #########################################################################################################
410
+ # Text to speech model inference.
411
+
412
+ with st.spinner(
413
+ text="Generating Audio..."
414
+ ): # Spinner to keep the application interactive.
415
+ # Model inference.
416
+
417
+ generate_speech(response=recipe)
418
+
419
+ #########################################################################################################
420
+ # Display/Output the generated recipe in text and audio.
421
+
422
+ with st.expander(label="Recipe"):
423
+ st.write(recipe)
424
+ st.audio("audio.flac")
425
+
426
+ #########################################################################################################
427
+
428
+ # st.write(st.session_state)
429
+
430
+ #############################################################################################################
431
+ # Conversation with ChefBot.
432
+
433
+ if "recipe" in st.session_state:
434
+ #########################################################################################################
435
+
436
+ # Context for the ChefBot. Context is use to accumulate messages.
437
+
438
+ context = [
439
+ {
440
+ "role": "system",
441
+ "content": f"""
442
+ You are a ChefBot, an automated service to guide users on how to cook step by step.
443
+ You must sound like {chef}.
444
+ You must first greet the user.
445
+ You must help the user step by step with this recipe: {st.session_state['recipe']}.
446
+ After you have given all of the steps of the recipe,
447
+ you must thank the user and ask for user feedback both on the recipe and on your personality.
448
+ Do NOT repeat the steps of any recipe during the conversation with the user.""",
449
+ }
450
+ ]
451
+ #########################################################################################################
452
+
453
+ # User input.
454
+
455
+ user_input = st.text_input(
456
+ label="User Input:",
457
+ key="user_input",
458
+ help="Follow up with the chef for any questions on the recipe.",
459
+ placeholder="Clarify step 1.",
460
+ )
461
+
462
+ #########################################################################################################
463
+
464
+ # Chat and Reset Chat buttons.
465
+
466
+ col1, col2, col3, col4, col5 = st.columns(5)
467
+ with col1:
468
+ button_chat = st.button("Chat")
469
+ with col5:
470
+ if st.button("Reset Chat"):
471
+ st.session_state.panels = []
472
+ user_input = False
473
+ #########################################################################################################
474
+
475
+ # Reverse the structure/way of displaying messages.
476
+
477
+ if "panels" not in st.session_state:
478
+ st.session_state.panels = []
479
+
480
+ #########################################################################################################
481
+
482
+ # If there is a user input or the chat button was clicked AND the input is not empty.
483
+
484
+ if (user_input or button_chat) and user_input != "":
485
+ # Context management.
486
+ prompt = user_input
487
+ context.append({"role": "user", "content": f"{prompt}"})
488
+
489
+ # OpenAI gpt-3.5-turbo model inference.
490
+ with st.spinner(text="Generating Response..."):
491
+ response = get_completion_from_messages(context)
492
+
493
+ # Text to speech model inference.
494
+ with st.spinner(text="Generating Audio..."):
495
+ generate_speech(response=response)
496
+
497
+ # Context management.
498
+ context.append({"role": "assistant", "content": f"{response}"})
499
+
500
+ # Appending the newly generated messages into the structure/way of displaying messages.
501
+ st.session_state.panels.append(("User:", prompt))
502
+ st.session_state.panels.append(("Assistant:", response))
503
+
504
+ #########################################################################################################
505
+
506
+ # Display/Output messages.
507
+
508
+ with st.expander("Conversation History", expanded=True):
509
+ for role, content in reversed(st.session_state.panels):
510
+ # User.
511
+ if role == "User:":
512
+ user = f"""<p align="left" style="font-family: monospace; font-size: 1rem;">
513
+ <b style="color:#dadada">πŸ‘€{role}</b> {content}</p>"""
514
+ st.markdown(user, unsafe_allow_html=True)
515
+ # ChefBot.
516
+ else:
517
+ st.audio("audio.flac")
518
+ assistant = f"""<p align="left" style="font-family: monospace; font-size: 1rem;">
519
+ <b style="color:#dadada">πŸ‘¨β€πŸ³{chef}:</b> {content}</p>"""
520
+ st.markdown(assistant, unsafe_allow_html=True)
521
+
522
+ #############################################################################################################
523
+ except Exception as e:
524
+ # General exception/error handling.
525
+
526
+ st.error(e)
527
+
528
+
529
+ #############################################################################################################################
530
+ if __name__ == "__main__":
531
+ main()