parasmech commited on
Commit
746dc09
1 Parent(s): 6e65235

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -18
app.py CHANGED
@@ -21,21 +21,6 @@ vit_feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-i
21
 
22
  tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
23
 
24
- #url = 'https://d2gp644kobdlm6.cloudfront.net/wp-content/uploads/2016/06/bigstock-Shocked-and-surprised-boy-on-t-113798588-300x212.jpg'
25
-
26
- #with Image.open(requests.get(url, stream=True).raw) as img:
27
- # pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
28
-
29
- #encoder_outputs = model.generate(pixel_values.to('cpu'),num_beams=5)
30
-
31
- #generated_sentences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
32
-
33
- #generated_sentences
34
-
35
- #naive text processing
36
- #generated_sentences[0].split('.')[0]
37
-
38
- # inference function
39
 
40
  def vit2distilgpt2(img):
41
  pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
@@ -44,7 +29,6 @@ def vit2distilgpt2(img):
44
 
45
  return(generated_sentences[0].split('.')[0])
46
 
47
- #!wget https://media.glamour.com/photos/5f171c4fd35176eaedb36823/master/w_2560%2Cc_limit/bike.jpg
48
 
49
  import gradio as gr
50
 
@@ -56,11 +40,11 @@ outputs = [
56
  gr.outputs.Textbox(label = 'Caption')
57
  ]
58
 
59
- title = "Image Captioning using ViT + GPT2 + nlpconnect"
60
  description = "ViT and GPT2 are used to generate Image Caption for the uploaded image. COCO Dataset was used for training."
61
  article = " <a href='https://huggingface.co/nlpconnect/vit-gpt2-image-captioning'>Model Repo on Hugging Face Model Hub</a>"
62
  examples = [
63
- ["people-walking-street-pedestrian-crossing-traffic-light-city.jpeg"],
64
  ["Img_1.jpg"],
65
  ["Img_2.jpg"]
66
  ]
 
21
 
22
  tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def vit2distilgpt2(img):
26
  pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
 
29
 
30
  return(generated_sentences[0].split('.')[0])
31
 
 
32
 
33
  import gradio as gr
34
 
 
40
  gr.outputs.Textbox(label = 'Caption')
41
  ]
42
 
43
+ title = "Image Captioning with Visual Transformer using nlpconnect"
44
  description = "ViT and GPT2 are used to generate Image Caption for the uploaded image. COCO Dataset was used for training."
45
  article = " <a href='https://huggingface.co/nlpconnect/vit-gpt2-image-captioning'>Model Repo on Hugging Face Model Hub</a>"
46
  examples = [
47
+ ["Img_3.jpg"],
48
  ["Img_1.jpg"],
49
  ["Img_2.jpg"]
50
  ]