Update app.py
Browse files
app.py
CHANGED
@@ -13,11 +13,7 @@ processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM_converted_4")
|
|
13 |
|
14 |
model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceTB/SmolVLM_converted_4",
|
15 |
torch_dtype=torch.bfloat16,
|
16 |
-
|
17 |
-
).to("cuda")
|
18 |
-
|
19 |
-
BAD_WORDS_IDS = processor.tokenizer(["<image>", "<fake_token_around_image>", "<row_", "apiro", "\u2500lrow_", "row_1"], add_special_tokens=False).input_ids
|
20 |
-
EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]
|
21 |
|
22 |
@spaces.GPU
|
23 |
def model_inference(
|
@@ -78,9 +74,9 @@ def model_inference(
|
|
78 |
|
79 |
|
80 |
with gr.Blocks(fill_height=True) as demo:
|
81 |
-
gr.Markdown("##
|
82 |
-
gr.Markdown("Play with [
|
83 |
-
gr.Markdown("**Disclaimer:**
|
84 |
with gr.Column():
|
85 |
image_input = gr.Image(label="Upload your Image", type="pil", scale=1)
|
86 |
query_input = gr.Textbox(label="Prompt")
|
|
|
13 |
|
14 |
model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceTB/SmolVLM_converted_4",
|
15 |
torch_dtype=torch.bfloat16,
|
16 |
+
_attn_implementation="flash_attention_2").to("cuda")
|
|
|
|
|
|
|
|
|
17 |
|
18 |
@spaces.GPU
|
19 |
def model_inference(
|
|
|
74 |
|
75 |
|
76 |
with gr.Blocks(fill_height=True) as demo:
|
77 |
+
gr.Markdown("## SmolVLM 🐶")
|
78 |
+
gr.Markdown("Play with [HuggingFaceTB/SmolVLM](https://huggingface.co/HuggingFaceTB/SmolVLM) in this demo. To get started, upload an image and text or try one of the examples.")
|
79 |
+
gr.Markdown("**Disclaimer:** SmolVLM does not include an RLHF alignment stage, so it may not consistently follow prompts or handle complex tasks. However, this doesn't mean it is incapable of doing so. Adding a prefix to the assistant's response, such as Let's think step for a reasoning question or `<html>` for HTML code generation, can significantly improve the output in practice. You could also play with the parameters such as the temperature in non-greedy mode.")
|
80 |
with gr.Column():
|
81 |
image_input = gr.Image(label="Upload your Image", type="pil", scale=1)
|
82 |
query_input = gr.Textbox(label="Prompt")
|