############################################################################################################################# # Filename : app.py # Description: A Streamlit application to generate recipes given an image of a food and an image of ingredients. # Author : Georgios Ioannou # # Copyright © 2024 by Georgios Ioannou ############################################################################################################################# # Import libraries. import openai # gpt-3.5-turbo model inference. import os # Load environment variable(s). import requests # Send HTTP GET request to Hugging Face models for inference. import streamlit as st # Build the GUI of the application. import torch # Load Salesforce/blip model(s) on GPU. from dotenv import load_dotenv, find_dotenv # Read local .env file. from langchain.chat_models import ChatOpenAI # Access to OpenAI gpt-3.5-turbo model. from langchain.chains import LLMChain # Chain to run queries against LLMs. # A prompt template. It accepts a set of parameters from the user that can be used to generate a prompt for a language model. from langchain.prompts import PromptTemplate from PIL import Image # Open and identify a given image file. from transformers import BlipProcessor, BlipForQuestionAnswering # VQA model inference. ############################################################################################################################# # Load environment variable(s). load_dotenv(find_dotenv()) # Read local .env file. HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") openai.api_key = os.getenv("OPENAI_API_KEY") ############################################################################################################################# # Function to apply local CSS. def local_css(file_name): with open(file_name) as f: st.markdown(f"", unsafe_allow_html=True) ############################################################################################################################# # Load the Visual Question Answering (VQA) model directly. # Using transformers. @st.cache_resource def load_model(): blip_processor_base = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") blip_model_base = BlipForQuestionAnswering.from_pretrained( "Salesforce/blip-vqa-base" ) # Backup model. # blip_processor_large = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large") # blip_model_large = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large") # return blip_processor_large, blip_model_large return blip_processor_base, blip_model_base ############################################################################################################################# # General function for any Salesforce/blip model(s). # VQA model. def generate_answer_blip(processor, model, image, question): # Prepare image + question. inputs = processor(images=image, text=question, return_tensors="pt") generated_ids = model.generate(**inputs, max_length=50) generated_answer = processor.batch_decode(generated_ids, skip_special_tokens=True) return generated_answer ############################################################################################################################# # Generate answer from the Salesforce/blip model(s). # VQA model. @st.cache_resource def generate_answer(image, question): answer_blip_base = generate_answer_blip( processor=blip_processor_base, model=blip_model_base, image=image, question=question, ) # answer_blip_large = generate_answer_blip(blip_processor_large, blip_model_large, image, question) # return answer_blip_large return answer_blip_base ############################################################################################################################# # Detect ingredients on an image. # Object detection model. @st.cache_resource def generate_ingredients(image): API_URL = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50" headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"} with open(image, "rb") as img: data = img.read() response = requests.post(url=API_URL, data=data, headers=headers) ingredients = response.json() return ingredients ############################################################################################################################# # Return the recipe generated by the model for the food and ingredients detected by the previous models. # Using Langchain. @st.cache_resource def generate_recipe(food, ingredients, chef): # Model used here: "gpt-3.5-turbo". # The template can be customized to meet one's needs such as: # Generate a recipe, generate a scenario, and generate lyrics of a song. template = """ You are a chef. You must sound like {chef}. You must make use of these ingredients: {ingredients}. Generate a detailed recipe step by step based on the above constraints for this food: {food}. """ prompt = PromptTemplate( template=template, input_variables=["food", "ingredients", "chef"] ) recipe_llm = LLMChain( llm=ChatOpenAI( model_name="gpt-3.5-turbo", temperature=0 ), # Increasing the temperature, the model becomes more creative and takes longer for inference. prompt=prompt, verbose=True, # Print intermediate values to the console. ) recipe = recipe_llm.predict( food=food, ingredients=ingredients, chef=chef ) # Format prompt with kwargs and pass to LLM. return recipe ############################################################################################################################# # Return the speech generated by the model for the recipe. # Using inference api. def generate_speech(response): # Model used here: "facebook/mms-tts-eng". # Backup model: "espnet/kan-bayashi_ljspeech_vits. # API_URL = ( # "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits" # ) API_URL = "https://api-inference.huggingface.co/models/facebook/mms-tts-eng" headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"} payload = {"inputs": response} response = requests.post(url=API_URL, headers=headers, json=payload) with open("audio.flac", "wb") as file: file.write(response.content) ############################################################################################################################# # Conversation with OpenAI gpt-3.5-turbo model. def get_completion_from_messages(messages, model="gpt-3.5-turbo", temperature=0): response = openai.ChatCompletion.create( model=model, messages=messages, temperature=temperature, # This is the degree of randomness of the model's output. ) # print(str(response.choices[0].message)) return response.choices[0].message["content"] ############################################################################################################################# # Page title and favicon. st.set_page_config(page_title="ChefBot | Recipe Generator/Assistant", page_icon="🍴") ############################################################################################################################# # Load the Salesforce/blip model directly. if torch.cuda.is_available(): device = torch.device("cuda") # elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): # device = torch.device("mps") else: device = torch.device("cpu") blip_processor_base, blip_model_base = load_model() blip_model_base.to(device) ############################################################################################################################# # Define the chefs for the dropdown menu. chefs = [ "Gordon Ramsay", "Donald Trump", "Cardi B", ] ############################################################################################################################# # Main function to create the Streamlit web application. def main(): try: ##################################################################################################################### # Load CSS. local_css("styles/style.css") ##################################################################################################################### # Title. title = f"""
👤{role} {content}
""" st.markdown(user, unsafe_allow_html=True) # ChefBot. else: st.audio("audio.flac") assistant = f"""👨🍳{chef}: {content}
""" st.markdown(assistant, unsafe_allow_html=True) ############################################################################################################# except Exception as e: # General exception/error handling. st.error(e) ############################################################################################################################# if __name__ == "__main__": main()