apratim24 commited on
Commit
59d06f0
·
verified ·
1 Parent(s): 36c597f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -84
app.py CHANGED
@@ -1,88 +1,15 @@
 
 
1
  from langchain_openai import OpenAI
 
 
 
2
  import os
3
  openai_api_key = os.getenv("OPENAI_API_KEY")
4
 
5
- import io
6
- import base64
7
- import requests
8
- from PIL import Image
9
-
10
- width = 1000
11
-
12
- # Function to resize image maintaining aspect ratio with a maximum width of 1000 pixels
13
- def resize_image(image, max_width=width):
14
- with Image.open(image) as img:
15
- ratio = max_width / img.width
16
- new_height = int(img.height * ratio)
17
- resized_img = img.resize((max_width, new_height), Image.ANTIALIAS)
18
- img_byte_arr = io.BytesIO()
19
- resized_img.save(img_byte_arr, format=img.format)
20
- return img_byte_arr.getvalue()
21
-
22
- # Function to encode the image to base64
23
- def encode_image(image):
24
- resized_image_bytes = resize_image(image) # Resize the image
25
- return base64.b64encode(resized_image_bytes).decode('utf-8')
26
-
27
- # Function to call the API for image and get the response
28
- def get_response_for_image(openai_api_key, image):
29
- base64_image = encode_image(image)
30
- headers = {
31
- "Content-Type": "application/json",
32
- "Authorization": f"Bearer {openai_api_key}"
33
- }
34
- payload = {
35
- "model": "gpt-4o",
36
- "messages": [
37
- {
38
- "role": "user",
39
- "content": [
40
- {
41
- "type": "text",
42
- "text": '''Describe or caption the image within 20 words.'''
43
- },
44
- {
45
- "type": "image_url",
46
- "image_url": {
47
- "url": f"data:image/jpeg;base64,{base64_image}",
48
- "detail": "low"
49
- }
50
- }
51
- ]
52
- }
53
- ],
54
- "max_tokens": 500
55
- }
56
- response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
57
- return response['choices'][0]['message']['content']
58
-
59
-
60
- def generate_story(image, theme, genre, word_count):
61
- try:
62
-
63
-
64
-
65
- # Decode the caption
66
- caption_text = get_response_for_image(openai_api_key, image)
67
-
68
- # Generate story based on the caption
69
- story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
70
-
71
- llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
72
- story = llm.invoke(story_prompt)
73
-
74
- return caption_text, story
75
- except Exception as e:
76
- return f"An error occurred during inference: {str(e)}"
77
-
78
-
79
- # Using open source models ----------------------------------------------------
80
-
81
- '''
82
- from transformers import pipeline, AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
83
-
84
  # Load text generation model
85
- text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
 
86
 
87
  # Load image captioning model
88
  encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
@@ -93,6 +20,7 @@ feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
93
  tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
94
  model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
95
 
 
96
  def generate_story(image, theme, genre, word_count):
97
  try:
98
  # Preprocess the image
@@ -108,15 +36,15 @@ def generate_story(image, theme, genre, word_count):
108
  # Generate story based on the caption
109
  story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
110
 
111
- story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
 
 
112
 
113
  return caption_text, story
114
  except Exception as e:
115
  return f"An error occurred during inference: {str(e)}"
116
- '''
117
 
118
 
119
- # -------------------------------------------------------------------------
120
 
121
  # Gradio interface
122
  input_image = gr.Image(label="Select Image",type="pil")
@@ -137,4 +65,3 @@ gr.Interface(
137
  title="Image to Story Generator",
138
  description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
139
  ).launch()
140
-
 
1
+
2
+ import gradio as gr
3
  from langchain_openai import OpenAI
4
+ from transformers import pipeline
5
+ from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
6
+
7
  import os
8
  openai_api_key = os.getenv("OPENAI_API_KEY")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Load text generation model
11
+ # text_generation_model = pipeline("text-generation", model="openai-community/gpt2-large")
12
+ # text_generation_model = pipeline("text-generation", model="distilbert/distilgpt2")
13
 
14
  # Load image captioning model
15
  encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
 
20
  tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
21
  model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
22
 
23
+
24
  def generate_story(image, theme, genre, word_count):
25
  try:
26
  # Preprocess the image
 
36
  # Generate story based on the caption
37
  story_prompt = f"Write an interesting {theme} story in the {genre} genre. The story should be within {word_count} words about {caption_text}."
38
 
39
+ llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)
40
+ story = llm.invoke(story_prompt)
41
+ # story = text_generation_model(story_prompt, max_length=150)[0]["generated_text"]
42
 
43
  return caption_text, story
44
  except Exception as e:
45
  return f"An error occurred during inference: {str(e)}"
 
46
 
47
 
 
48
 
49
  # Gradio interface
50
  input_image = gr.Image(label="Select Image",type="pil")
 
65
  title="Image to Story Generator",
66
  description="Generate a story from an image taking theme and genre as input. It leverages image captioning and text generation models.",
67
  ).launch()