Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -21,21 +21,6 @@ vit_feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-i
|
|
21 |
|
22 |
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
23 |
|
24 |
-
#url = 'https://d2gp644kobdlm6.cloudfront.net/wp-content/uploads/2016/06/bigstock-Shocked-and-surprised-boy-on-t-113798588-300x212.jpg'
|
25 |
-
|
26 |
-
#with Image.open(requests.get(url, stream=True).raw) as img:
|
27 |
-
# pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
|
28 |
-
|
29 |
-
#encoder_outputs = model.generate(pixel_values.to('cpu'),num_beams=5)
|
30 |
-
|
31 |
-
#generated_sentences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
|
32 |
-
|
33 |
-
#generated_sentences
|
34 |
-
|
35 |
-
#naive text processing
|
36 |
-
#generated_sentences[0].split('.')[0]
|
37 |
-
|
38 |
-
# inference function
|
39 |
|
40 |
def vit2distilgpt2(img):
|
41 |
pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
|
@@ -44,7 +29,6 @@ def vit2distilgpt2(img):
|
|
44 |
|
45 |
return(generated_sentences[0].split('.')[0])
|
46 |
|
47 |
-
#!wget https://media.glamour.com/photos/5f171c4fd35176eaedb36823/master/w_2560%2Cc_limit/bike.jpg
|
48 |
|
49 |
import gradio as gr
|
50 |
|
@@ -56,11 +40,11 @@ outputs = [
|
|
56 |
gr.outputs.Textbox(label = 'Caption')
|
57 |
]
|
58 |
|
59 |
-
title = "Image Captioning
|
60 |
description = "ViT and GPT2 are used to generate Image Caption for the uploaded image. COCO Dataset was used for training."
|
61 |
article = " <a href='https://huggingface.co/nlpconnect/vit-gpt2-image-captioning'>Model Repo on Hugging Face Model Hub</a>"
|
62 |
examples = [
|
63 |
-
["
|
64 |
["Img_1.jpg"],
|
65 |
["Img_2.jpg"]
|
66 |
]
|
|
|
21 |
|
22 |
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
def vit2distilgpt2(img):
|
26 |
pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
|
|
|
29 |
|
30 |
return(generated_sentences[0].split('.')[0])
|
31 |
|
|
|
32 |
|
33 |
import gradio as gr
|
34 |
|
|
|
40 |
gr.outputs.Textbox(label = 'Caption')
|
41 |
]
|
42 |
|
43 |
+
title = "Image Captioning with Visual Transformer using nlpconnect"
|
44 |
description = "ViT and GPT2 are used to generate Image Caption for the uploaded image. COCO Dataset was used for training."
|
45 |
article = " <a href='https://huggingface.co/nlpconnect/vit-gpt2-image-captioning'>Model Repo on Hugging Face Model Hub</a>"
|
46 |
examples = [
|
47 |
+
["Img_3.jpg"],
|
48 |
["Img_1.jpg"],
|
49 |
["Img_2.jpg"]
|
50 |
]
|