Spaces:

jykoh
/

gill

Runtime error

App Files Files Community

jykoh commited on Jun 27, 2023

Commit

8d32b88

1 Parent(s): b027f26

Fix paths, only generate if required.

Browse files

Files changed (2) hide show

app.py +10 -11
gill/models.py +2 -2

app.py CHANGED Viewed

@@ -53,7 +53,7 @@ def upload_image(state, image_input):
     input_image = Image.open(image_input.name).resize(
         (224, 224)).convert('RGB')
     input_image.save(image_input.name)  # Overwrite with smaller image.
-    conversation += [(f'<img src="/file={image_input.name}" style="display: inline-block;">', "")]
     return [conversation, chat_history + [input_image, ""]], conversation
@@ -117,11 +117,11 @@ def generate_for_prompt(input_text, state, ret_scale_factor, num_words, temperat
             if p['decision'] is not None and p['decision'][0] == 'gen':
                 image = p['gen'][0][0].resize((512, 512))
                 filename = save_image_to_local(image)
-                response += f'<img src="/file={filename}" style="display: inline-block;"><p style="font-size: 12px; color: #555;">(Generated)</p>'
             else:
                 image = p['ret'][0][0].resize((512, 512))
                 filename = save_image_to_local(image)
-                response += f'<img src="/file={filename}" style="display: inline-block;"><p style="font-size: 12px; color: #555;">(Retrieved)</p>'
     chat_history = model_inputs + \
@@ -137,24 +137,23 @@ def generate_for_prompt(input_text, state, ret_scale_factor, num_words, temperat
 with gr.Blocks(css=css) as demo:
     gr.HTML("""
-        <h1>🧀 FROMAGe</h1>
-        <p>This is the official Gradio demo for the FROMAGe model, a model that can process arbitrarily interleaved image and text inputs, and produce image and text outputs.</p>
-        <strong>Paper:</strong> <a href="https://arxiv.org/abs/2301.13823" target="_blank">Grounding Language Models to Images for Multimodal Generation</a>
         <br/>
-        <strong>Project Website:</strong> <a href="https://jykoh.com/fromage" target="_blank">FROMAGe Website</a>
         <br/>
-        <strong>Code and Models:</strong> <a href="https://github.com/kohjingyu/fromage" target="_blank">GitHub</a>
         <br/>
         <br/>
         <strong>Tips:</strong>
         <ul>
-        <li>Start by inputting either image or text prompts (or both) and chat with FROMAGe to get image-and-text replies.</li>
         <li>Tweak the level of sensitivity to images and text using the parameters on the right.</li>
-        <li>FROMAGe <i>retrieves</i> images from a database, and doesn't generate novel images, and will not be able to return images outside those in Conceptual Captions.</li>
         <li>Check out cool conversations in the examples or community tab for inspiration and share your own!</li>
-        <li>For faster inference without waiting in queue, you may duplicate the space and use your own GPU: <a href="https://huggingface.co/spaces/jykoh/fromage?duplicate=true"><img style="display: inline-block; margin-top: 0em; margin-bottom: 0em" src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></li>
         </ul>
     """)

     input_image = Image.open(image_input.name).resize(
         (224, 224)).convert('RGB')
     input_image.save(image_input.name)  # Overwrite with smaller image.
+    conversation += [(f'<img src="https://huggingface.co/spaces/jykoh/gill/file={image_input.name}" style="display: inline-block;">', "")]
     return [conversation, chat_history + [input_image, ""]], conversation
             if p['decision'] is not None and p['decision'][0] == 'gen':
                 image = p['gen'][0][0].resize((512, 512))
                 filename = save_image_to_local(image)
+                response += f'<img src="https://huggingface.co/spaces/jykoh/gill/file={filename}" style="display: inline-block;"><p style="font-size: 12px; color: #555;">(Generated)</p>'
             else:
                 image = p['ret'][0][0].resize((512, 512))
                 filename = save_image_to_local(image)
+                response += f'<img src="https://huggingface.co/spaces/jykoh/gill/file={filename}" style="display: inline-block;"><p style="font-size: 12px; color: #555;">(Retrieved)</p>'
     chat_history = model_inputs + \
 with gr.Blocks(css=css) as demo:
     gr.HTML("""
+        <h1>🐟 GILL</h1>
+        <p>This is the official Gradio demo for the GILL model, a model that can process arbitrarily interleaved image and text inputs, and produce image and text outputs.</p>
+        <strong>Paper:</strong> <a href="https://arxiv.org/abs/2305.17216" target="_blank">Generating Images with Multimodal Language Models</a>
         <br/>
+        <strong>Project Website:</strong> <a href="https://jykoh.com/gill" target="_blank">GILL Website</a>
         <br/>
+        <strong>Code and Models:</strong> <a href="https://github.com/kohjingyu/gill" target="_blank">GitHub</a>
         <br/>
         <br/>
         <strong>Tips:</strong>
         <ul>
+        <li>Start by inputting either image or text prompts (or both) and chat with GILL to get image-and-text replies.</li>
         <li>Tweak the level of sensitivity to images and text using the parameters on the right.</li>
         <li>Check out cool conversations in the examples or community tab for inspiration and share your own!</li>
+        <li>For faster inference without waiting in queue, you may duplicate the space and use your own GPU: <a href="https://huggingface.co/spaces/jykoh/gill?duplicate=true"><img style="display: inline-block; margin-top: 0em; margin-bottom: 0em" src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></li>
         </ul>
     """)

gill/models.py CHANGED Viewed

@@ -729,8 +729,8 @@ class GILL(nn.Module):
           gen_emb = gen_emb.repeat(self.num_gen_images, 1, 1)  # (self.num_gen_images, 77, 768)
-          # OPTIM(jykoh): Only generate if scores are low.
-          if self.load_sd:
             # If num_gen_images > 8, split into multiple batches (for GPU memory reasons).
             gen_max_bs = 8
             gen_images = []

           gen_emb = gen_emb.repeat(self.num_gen_images, 1, 1)  # (self.num_gen_images, 77, 768)
+          # Only generate if we are showing a generated image.
+          if self.load_sd and image_outputs['decision'][0] == 'gen':
             # If num_gen_images > 8, split into multiple batches (for GPU memory reasons).
             gen_max_bs = 8
             gen_images = []