Tonic commited on
Commit
7732f66
·
verified ·
1 Parent(s): bdc2a1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -12,7 +12,8 @@ title = """ # 🙋🏻‍♂️Welcome to Tonic's🦅Falcon Vision👁️Langua
12
  description = """
13
  Falcon2-11B-vlm is an 11B parameters causal decoder-only model built by TII and trained on over 5,000B tokens of RefinedWeb enhanced with curated corpora. To bring vision capabilities, , we integrate the pretrained CLIP ViT-L/14 vision encoder with our Falcon2-11B chat-finetuned model and train with image-text data. For enhancing the VLM's perception of fine-grained details w.r.t small objects in images, we employ a dynamic encoding mechanism at high-resolution for image inputs.
14
 
15
- Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) Math 🔍 [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [MultiTonic](https://github.com/multitonic/)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
 
16
  """
17
 
18
  processor = LlavaNextProcessor.from_pretrained("tiiuae/falcon-11B-vlm", tokenizer_class='PreTrainedTokenizerFast')
@@ -31,42 +32,51 @@ def generate_paragraph(image_url):
31
 
32
  return generated_captions
33
 
34
- # Function to set the URL and generate the paragraph
35
  def set_and_generate(url):
36
  generated_paragraph = generate_paragraph(url)
37
  return url, generated_paragraph
38
 
39
- # Create the Gradio Blocks interface
40
  with gr.Blocks() as demo:
 
41
  gr.Markdown(title)
42
  gr.Markdown(description)
43
 
 
44
  with gr.Row():
 
45
  with gr.Column():
 
46
  image_url_input = gr.Textbox(label="Image URL")
47
  generate_button = gr.Button("Generate Paragraph")
48
 
49
- example_1 = gr.Button("Types of Falcons")
50
- example_2 = gr.Button("Camel Racing - Saudi Arabia")
51
- example_3 = gr.Button("Urban Street Scene - India")
52
-
 
53
  with gr.Column():
54
  generated_paragraph_output = gr.Textbox(label="Generated Paragraph")
55
 
56
  generate_button.click(generate_paragraph, inputs=image_url_input, outputs=generated_paragraph_output)
57
 
 
 
58
  example_1.click(
59
  lambda: set_and_generate("https://www.animalspot.net/wp-content/uploads/2020/01/Types-of-Falcons.jpg"),
60
  outputs=[image_url_input, generated_paragraph_output]
61
  )
 
62
  example_2.click(
63
  lambda: set_and_generate("https://www.leaders-mena.com/leaders/uploads/2023/01/The-Traditional-Camel-Racing-In-Saudi-Arabia-Unique-Sport-Activity-1024x576.jpg"),
64
  outputs=[image_url_input, generated_paragraph_output]
65
  )
 
66
  example_3.click(
67
  lambda: set_and_generate("http://embed.robertharding.com/embed/1161-4342.jpg"),
68
  outputs=[image_url_input, generated_paragraph_output]
69
  )
70
 
 
71
  # Launch the Gradio interface
72
  demo.launch()
 
12
  description = """
13
  Falcon2-11B-vlm is an 11B parameters causal decoder-only model built by TII and trained on over 5,000B tokens of RefinedWeb enhanced with curated corpora. To bring vision capabilities, , we integrate the pretrained CLIP ViT-L/14 vision encoder with our Falcon2-11B chat-finetuned model and train with image-text data. For enhancing the VLM's perception of fine-grained details w.r.t small objects in images, we employ a dynamic encoding mechanism at high-resolution for image inputs.
14
 
15
+ ### Join us :
16
+ 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) Math 🔍 [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [MultiTonic](https://github.com/multitonic/)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
17
  """
18
 
19
  processor = LlavaNextProcessor.from_pretrained("tiiuae/falcon-11B-vlm", tokenizer_class='PreTrainedTokenizerFast')
 
32
 
33
  return generated_captions
34
 
 
35
  def set_and_generate(url):
36
  generated_paragraph = generate_paragraph(url)
37
  return url, generated_paragraph
38
 
39
+
40
  with gr.Blocks() as demo:
41
+
42
  gr.Markdown(title)
43
  gr.Markdown(description)
44
 
45
+
46
  with gr.Row():
47
+
48
  with gr.Column():
49
+
50
  image_url_input = gr.Textbox(label="Image URL")
51
  generate_button = gr.Button("Generate Paragraph")
52
 
53
+ example_1 = gr.Image(value="https://www.animalspot.net/wp-content/uploads/2020/01/Types-of-Falcons.jpg", label="Types of Falcons", interactive=True, shape=(150, 150))
54
+ example_2 = gr.Image(value="https://www.leaders-mena.com/leaders/uploads/2023/01/The-Traditional-Camel-Racing-In-Saudi-Arabia-Unique-Sport-Activity-1024x576.jpg", label="Camel Racing - Saudi Arabia", interactive=True, shape=(150, 150))
55
+ example_3 = gr.Image(value="http://embed.robertharding.com/embed/1161-4342.jpg", label="Urban Street Scene - India", interactive=True, shape=(150, 150))
56
+
57
+
58
  with gr.Column():
59
  generated_paragraph_output = gr.Textbox(label="Generated Paragraph")
60
 
61
  generate_button.click(generate_paragraph, inputs=image_url_input, outputs=generated_paragraph_output)
62
 
63
+
64
+
65
  example_1.click(
66
  lambda: set_and_generate("https://www.animalspot.net/wp-content/uploads/2020/01/Types-of-Falcons.jpg"),
67
  outputs=[image_url_input, generated_paragraph_output]
68
  )
69
+
70
  example_2.click(
71
  lambda: set_and_generate("https://www.leaders-mena.com/leaders/uploads/2023/01/The-Traditional-Camel-Racing-In-Saudi-Arabia-Unique-Sport-Activity-1024x576.jpg"),
72
  outputs=[image_url_input, generated_paragraph_output]
73
  )
74
+
75
  example_3.click(
76
  lambda: set_and_generate("http://embed.robertharding.com/embed/1161-4342.jpg"),
77
  outputs=[image_url_input, generated_paragraph_output]
78
  )
79
 
80
+
81
  # Launch the Gradio interface
82
  demo.launch()