jykoh commited on
Commit
8d32b88
Β·
1 Parent(s): b027f26

Fix paths, only generate if required.

Browse files
Files changed (2) hide show
  1. app.py +10 -11
  2. gill/models.py +2 -2
app.py CHANGED
@@ -53,7 +53,7 @@ def upload_image(state, image_input):
53
  input_image = Image.open(image_input.name).resize(
54
  (224, 224)).convert('RGB')
55
  input_image.save(image_input.name) # Overwrite with smaller image.
56
- conversation += [(f'<img src="/file={image_input.name}" style="display: inline-block;">', "")]
57
  return [conversation, chat_history + [input_image, ""]], conversation
58
 
59
 
@@ -117,11 +117,11 @@ def generate_for_prompt(input_text, state, ret_scale_factor, num_words, temperat
117
  if p['decision'] is not None and p['decision'][0] == 'gen':
118
  image = p['gen'][0][0].resize((512, 512))
119
  filename = save_image_to_local(image)
120
- response += f'<img src="/file={filename}" style="display: inline-block;"><p style="font-size: 12px; color: #555;">(Generated)</p>'
121
  else:
122
  image = p['ret'][0][0].resize((512, 512))
123
  filename = save_image_to_local(image)
124
- response += f'<img src="/file={filename}" style="display: inline-block;"><p style="font-size: 12px; color: #555;">(Retrieved)</p>'
125
 
126
 
127
  chat_history = model_inputs + \
@@ -137,24 +137,23 @@ def generate_for_prompt(input_text, state, ret_scale_factor, num_words, temperat
137
 
138
  with gr.Blocks(css=css) as demo:
139
  gr.HTML("""
140
- <h1>πŸ§€ FROMAGe</h1>
141
- <p>This is the official Gradio demo for the FROMAGe model, a model that can process arbitrarily interleaved image and text inputs, and produce image and text outputs.</p>
142
 
143
- <strong>Paper:</strong> <a href="https://arxiv.org/abs/2301.13823" target="_blank">Grounding Language Models to Images for Multimodal Generation</a>
144
  <br/>
145
- <strong>Project Website:</strong> <a href="https://jykoh.com/fromage" target="_blank">FROMAGe Website</a>
146
  <br/>
147
- <strong>Code and Models:</strong> <a href="https://github.com/kohjingyu/fromage" target="_blank">GitHub</a>
148
  <br/>
149
  <br/>
150
 
151
  <strong>Tips:</strong>
152
  <ul>
153
- <li>Start by inputting either image or text prompts (or both) and chat with FROMAGe to get image-and-text replies.</li>
154
  <li>Tweak the level of sensitivity to images and text using the parameters on the right.</li>
155
- <li>FROMAGe <i>retrieves</i> images from a database, and doesn't generate novel images, and will not be able to return images outside those in Conceptual Captions.</li>
156
  <li>Check out cool conversations in the examples or community tab for inspiration and share your own!</li>
157
- <li>For faster inference without waiting in queue, you may duplicate the space and use your own GPU: <a href="https://huggingface.co/spaces/jykoh/fromage?duplicate=true"><img style="display: inline-block; margin-top: 0em; margin-bottom: 0em" src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></li>
158
  </ul>
159
  """)
160
 
 
53
  input_image = Image.open(image_input.name).resize(
54
  (224, 224)).convert('RGB')
55
  input_image.save(image_input.name) # Overwrite with smaller image.
56
+ conversation += [(f'<img src="https://huggingface.co/spaces/jykoh/gill/file={image_input.name}" style="display: inline-block;">', "")]
57
  return [conversation, chat_history + [input_image, ""]], conversation
58
 
59
 
 
117
  if p['decision'] is not None and p['decision'][0] == 'gen':
118
  image = p['gen'][0][0].resize((512, 512))
119
  filename = save_image_to_local(image)
120
+ response += f'<img src="https://huggingface.co/spaces/jykoh/gill/file={filename}" style="display: inline-block;"><p style="font-size: 12px; color: #555;">(Generated)</p>'
121
  else:
122
  image = p['ret'][0][0].resize((512, 512))
123
  filename = save_image_to_local(image)
124
+ response += f'<img src="https://huggingface.co/spaces/jykoh/gill/file={filename}" style="display: inline-block;"><p style="font-size: 12px; color: #555;">(Retrieved)</p>'
125
 
126
 
127
  chat_history = model_inputs + \
 
137
 
138
  with gr.Blocks(css=css) as demo:
139
  gr.HTML("""
140
+ <h1>🐟 GILL</h1>
141
+ <p>This is the official Gradio demo for the GILL model, a model that can process arbitrarily interleaved image and text inputs, and produce image and text outputs.</p>
142
 
143
+ <strong>Paper:</strong> <a href="https://arxiv.org/abs/2305.17216" target="_blank">Generating Images with Multimodal Language Models</a>
144
  <br/>
145
+ <strong>Project Website:</strong> <a href="https://jykoh.com/gill" target="_blank">GILL Website</a>
146
  <br/>
147
+ <strong>Code and Models:</strong> <a href="https://github.com/kohjingyu/gill" target="_blank">GitHub</a>
148
  <br/>
149
  <br/>
150
 
151
  <strong>Tips:</strong>
152
  <ul>
153
+ <li>Start by inputting either image or text prompts (or both) and chat with GILL to get image-and-text replies.</li>
154
  <li>Tweak the level of sensitivity to images and text using the parameters on the right.</li>
 
155
  <li>Check out cool conversations in the examples or community tab for inspiration and share your own!</li>
156
+ <li>For faster inference without waiting in queue, you may duplicate the space and use your own GPU: <a href="https://huggingface.co/spaces/jykoh/gill?duplicate=true"><img style="display: inline-block; margin-top: 0em; margin-bottom: 0em" src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></li>
157
  </ul>
158
  """)
159
 
gill/models.py CHANGED
@@ -729,8 +729,8 @@ class GILL(nn.Module):
729
 
730
  gen_emb = gen_emb.repeat(self.num_gen_images, 1, 1) # (self.num_gen_images, 77, 768)
731
 
732
- # OPTIM(jykoh): Only generate if scores are low.
733
- if self.load_sd:
734
  # If num_gen_images > 8, split into multiple batches (for GPU memory reasons).
735
  gen_max_bs = 8
736
  gen_images = []
 
729
 
730
  gen_emb = gen_emb.repeat(self.num_gen_images, 1, 1) # (self.num_gen_images, 77, 768)
731
 
732
+ # Only generate if we are showing a generated image.
733
+ if self.load_sd and image_outputs['decision'][0] == 'gen':
734
  # If num_gen_images > 8, split into multiple batches (for GPU memory reasons).
735
  gen_max_bs = 8
736
  gen_images = []