Spaces:

apratim24
/

Image_to_Story_Generator

Runtime error

App Files Files Community

apratim24 commited on May 30, 2024

Commit

59d06f0

verified ·

1 Parent(s): 36c597f

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -84

app.py CHANGED Viewed

@@ -1,88 +1,15 @@
 from langchain_openai import OpenAI
 import os
 openai_api_key = os.getenv("OPENAI_API_KEY")
-import io
-import base64
-import requests
-from PIL import Image
-width = 1000
-# Function to resize image maintaining aspect ratio with a maximum width of 1000 pixels
-def resize_image(image, max_width=width):
-    with Image.open(image) as img:
-        ratio = max_width / img.width
-        new_height = int(img.height * ratio)
-        resized_img = img.resize((max_width, new_height), Image.ANTIALIAS)
-        img_byte_arr = io.BytesIO()
-        resized_img.save(img_byte_arr, format=img.format)
-        return img_byte_arr.getvalue()
-# Function to encode the image to base64
-def encode_image(image):
-    resized_image_bytes = resize_image(image)  # Resize the image
-    return base64.b64encode(resized_image_bytes).decode('utf-8')
-# Function to call the API for image and get the response
-def get_response_for_image(openai_api_key, image):
-    base64_image = encode_image(image)
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {openai_api_key}"
-    }
-    payload = {
-        "model": "gpt-4o",
-        "messages": [
-          {
-            "role": "user",
-            "content": [
-              {
-                "type": "text",
-                "text": '''Describe or caption the image within 20 words.'''
-              },
-              {
-                "type": "image_url",
-                "image_url": {
-                  "url": f"data:image/jpeg;base64,{base64_image}",
-                  "detail": "low"
-                }
-              }
-            ]
-          }
-        ],
-        "max_tokens": 500
-    }
-    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
-    return response['choices'][0]['message']['content']
-def generate_story(image, theme, genre, word_count):
-    try:
-        # Decode the caption
-        caption_text = get_response_for_image(openai_api_key, image)
-        # Generate story based on the caption
-        story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
-        llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
-        story = llm.invoke(story_prompt)
-        return caption_text, story
-    except Exception as e:
-        return f"An error occurred during inference: {str(e)}"
-# Using open source models ----------------------------------------------------
-'''
-from transformers import pipeline, AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
 # Load text generation model
-text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
 # Load image captioning model
 encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
@@ -93,6 +20,7 @@ feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
 tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
 model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
 def generate_story(image, theme, genre, word_count):
     try:
         # Preprocess the image
@@ -108,15 +36,15 @@ def generate_story(image, theme, genre, word_count):
         # Generate story based on the caption
         story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
-        story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
         return caption_text, story
     except Exception as e:
         return f"An error occurred during inference: {str(e)}"
-'''
-# -------------------------------------------------------------------------
 # Gradio interface
 input_image = gr.Image(label="Select Image",type="pil")
@@ -137,4 +65,3 @@ gr.Interface(
     title="Image to Story Generator",
     description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
 ).launch()

+import gradio as gr
 from langchain_openai import OpenAI
+from transformers import pipeline
+from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
 import os
 openai_api_key = os.getenv("OPENAI_API_KEY")
 # Load text generation model
+# text_generation_model = pipeline("text-generation", model="openai-community/gpt2-large")
+# text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
 # Load image captioning model
 encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
 model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
 def generate_story(image, theme, genre, word_count):
     try:
         # Preprocess the image
         # Generate story based on the caption
         story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
+        llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
+        story = llm.invoke(story_prompt)
+        # story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
         return caption_text, story
     except Exception as e:
         return f"An error occurred during inference: {str(e)}"
 # Gradio interface
 input_image = gr.Image(label="Select Image",type="pil")
     title="Image to Story Generator",
     description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
 ).launch()