Spaces:

aifeifei798
/

SmolVLM-Image-To-Flux-Prompt

Running on Zero

App Files Files

aifeifei798 commited on about 15 hours ago

Commit

aedde4e

verified ·

1 Parent(s): e008325

Upload 3 files

Browse files

Files changed (3) hide show

app.py +14 -0
feifeilib/feifeismolvlm.py +58 -0
requirements.txt +1 -0

app.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import gradio as gr
+from feifeilib.feifeismolvlm import feifeismolvlm
+with gr.Blocks() as demo:
+    gr.Markdown("Image To Flux Prompt (SmolVLM)")
+    with gr.Tab(label="Image To Flux Prompt"):
+        input_img = gr.Image(label="Input Picture",height=320,type="filepath")
+        submit_btn = gr.Button(value="Submit")
+        output_text = gr.Textbox(label="Flux Prompt")
+    submit_btn.click(feifeismolvlm, [input_img], [output_text])
+demo.launch(server_name="0.0.0.0")

feifeilib/feifeismolvlm.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import gradio as gr
+from transformers import AutoProcessor, AutoModelForVision2Seq, TextIteratorStreamer
+from transformers.image_utils import load_image
+from threading import Thread
+import re
+import time
+import torch
+import spaces
+processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct-250M")
+model = AutoModelForVision2Seq.from_pretrained("HuggingFaceTB/SmolVLM-Instruct-250M",
+        torch_dtype=torch.bfloat16,
+        #_attn_implementation="flash_attention_2"
+        ).to("cuda")
+@spaces.GPU
+def feifeismolvlm(input_img):
+    text = "Describe this image."
+    images = [load_image(input_img)]
+    resulting_messages = [
+                {
+                    "role": "user",
+                    "content": [{"type": "image"} for _ in range(len(images))] + [
+                        {"type": "text", "text": text}
+                    ]
+                }
+            ]
+    prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
+    inputs = processor(text=prompt, images=[images], return_tensors="pt")
+    inputs = inputs.to('cuda')
+    generation_args = {
+        "input_ids": inputs.input_ids,
+        "pixel_values": inputs.pixel_values,
+        "attention_mask": inputs.attention_mask,
+        "num_return_sequences": 1,
+        "no_repeat_ngram_size": 2,
+        "max_new_tokens": 500,
+        "min_new_tokens": 10,
+    }
+    # Generate
+    streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+    generation_args = dict(inputs, streamer=streamer, max_new_tokens=500)
+    generated_text = ""
+    thread = Thread(target=model.generate, kwargs=generation_args)
+    thread.start()
+    yield "..."
+    buffer = ""
+    for new_text in streamer:
+      buffer += new_text
+      generated_text_without_prompt = buffer#[len(ext_buffer):]
+      time.sleep(0.01)
+      yield buffer

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ spaces