aifeifei798 commited on
Commit
aedde4e
·
verified ·
1 Parent(s): e008325

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +14 -0
  2. feifeilib/feifeismolvlm.py +58 -0
  3. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from feifeilib.feifeismolvlm import feifeismolvlm
3
+
4
+ with gr.Blocks() as demo:
5
+ gr.Markdown("Image To Flux Prompt (SmolVLM)")
6
+ with gr.Tab(label="Image To Flux Prompt"):
7
+ input_img = gr.Image(label="Input Picture",height=320,type="filepath")
8
+ submit_btn = gr.Button(value="Submit")
9
+ output_text = gr.Textbox(label="Flux Prompt")
10
+
11
+
12
+ submit_btn.click(feifeismolvlm, [input_img], [output_text])
13
+
14
+ demo.launch(server_name="0.0.0.0")
feifeilib/feifeismolvlm.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForVision2Seq, TextIteratorStreamer
3
+ from transformers.image_utils import load_image
4
+ from threading import Thread
5
+ import re
6
+ import time
7
+ import torch
8
+ import spaces
9
+
10
+ processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct-250M")
11
+ model = AutoModelForVision2Seq.from_pretrained("HuggingFaceTB/SmolVLM-Instruct-250M",
12
+ torch_dtype=torch.bfloat16,
13
+ #_attn_implementation="flash_attention_2"
14
+ ).to("cuda")
15
+
16
+ @spaces.GPU
17
+ def feifeismolvlm(input_img):
18
+ text = "Describe this image."
19
+ images = [load_image(input_img)]
20
+ resulting_messages = [
21
+ {
22
+ "role": "user",
23
+ "content": [{"type": "image"} for _ in range(len(images))] + [
24
+ {"type": "text", "text": text}
25
+ ]
26
+ }
27
+ ]
28
+ prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
29
+ inputs = processor(text=prompt, images=[images], return_tensors="pt")
30
+ inputs = inputs.to('cuda')
31
+ generation_args = {
32
+ "input_ids": inputs.input_ids,
33
+ "pixel_values": inputs.pixel_values,
34
+ "attention_mask": inputs.attention_mask,
35
+ "num_return_sequences": 1,
36
+ "no_repeat_ngram_size": 2,
37
+ "max_new_tokens": 500,
38
+ "min_new_tokens": 10,
39
+ }
40
+
41
+ # Generate
42
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
43
+ generation_args = dict(inputs, streamer=streamer, max_new_tokens=500)
44
+ generated_text = ""
45
+
46
+ thread = Thread(target=model.generate, kwargs=generation_args)
47
+ thread.start()
48
+
49
+ yield "..."
50
+ buffer = ""
51
+
52
+
53
+ for new_text in streamer:
54
+
55
+ buffer += new_text
56
+ generated_text_without_prompt = buffer#[len(ext_buffer):]
57
+ time.sleep(0.01)
58
+ yield buffer
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ spaces