Spaces:
Sleeping
Sleeping
krishnapal2308
commited on
Commit
·
7301eb7
1
Parent(s):
4582e37
pipeline to manual
Browse files- __pycache__/inference_script.cpython-310.pyc +0 -0
- __pycache__/vit_gpt2.cpython-310.pyc +0 -0
- app.py +0 -2
- inference_script.py +2 -2
- test.py +0 -2
- vit_gpt2.py +18 -30
__pycache__/inference_script.cpython-310.pyc
CHANGED
Binary files a/__pycache__/inference_script.cpython-310.pyc and b/__pycache__/inference_script.cpython-310.pyc differ
|
|
__pycache__/vit_gpt2.cpython-310.pyc
CHANGED
Binary files a/__pycache__/vit_gpt2.cpython-310.pyc and b/__pycache__/vit_gpt2.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
-
import base64
|
2 |
import tempfile
|
3 |
-
import numpy as np
|
4 |
import gradio as gr
|
5 |
from gtts import gTTS
|
6 |
import inference_script
|
|
|
|
|
1 |
import tempfile
|
|
|
2 |
import gradio as gr
|
3 |
from gtts import gTTS
|
4 |
import inference_script
|
inference_script.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import numpy as np
|
2 |
-
|
3 |
import tensorflow as tf
|
4 |
import keras
|
5 |
from keras.models import Model
|
6 |
-
|
|
|
7 |
|
8 |
class Encoder(Model):
|
9 |
def __init__(self, embed_dim):
|
|
|
1 |
import numpy as np
|
|
|
2 |
import tensorflow as tf
|
3 |
import keras
|
4 |
from keras.models import Model
|
5 |
+
import warnings
|
6 |
+
warnings.filterwarnings('ignore')
|
7 |
|
8 |
class Encoder(Model):
|
9 |
def __init__(self, embed_dim):
|
test.py
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
-
import base64
|
2 |
import tempfile
|
3 |
-
import numpy as np
|
4 |
import gradio as gr
|
5 |
from gtts import gTTS
|
6 |
import inference_script
|
|
|
|
|
1 |
import tempfile
|
|
|
2 |
import gradio as gr
|
3 |
from gtts import gTTS
|
4 |
import inference_script
|
vit_gpt2.py
CHANGED
@@ -1,39 +1,27 @@
|
|
1 |
-
from transformers import
|
2 |
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
|
5 |
def predict_step(img_array):
|
6 |
-
image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
7 |
i_image = Image.fromarray(img_array)
|
8 |
|
9 |
if i_image.mode != "RGB":
|
10 |
i_image = i_image.convert(mode="RGB")
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
# feature_extractor = ViTImageProcessor.from_pretrained("vit-gpt2-image-captioning")
|
20 |
-
# tokenizer = AutoTokenizer.from_pretrained("vit-gpt2-image-captioning")
|
21 |
-
#
|
22 |
-
# max_length = 16
|
23 |
-
# num_beams = 4
|
24 |
-
# gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
|
25 |
-
#
|
26 |
-
#
|
27 |
-
# def predict_step(img_array):
|
28 |
-
# i_image = Image.fromarray(img_array)
|
29 |
-
#
|
30 |
-
# if i_image.mode != "RGB":
|
31 |
-
# i_image = i_image.convert(mode="RGB")
|
32 |
-
#
|
33 |
-
# pixel_values = feature_extractor(images=i_image, return_tensors="pt", do_normalize=True).pixel_values
|
34 |
-
#
|
35 |
-
# output_ids = model.generate(pixel_values, **gen_kwargs)
|
36 |
-
#
|
37 |
-
# pred = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
38 |
-
# pred = [p.strip() for p in pred]
|
39 |
-
# return pred
|
|
|
1 |
+
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
|
2 |
from PIL import Image
|
3 |
+
import warnings
|
4 |
+
warnings.filterwarnings('ignore')
|
5 |
+
|
6 |
+
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
7 |
+
feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
9 |
+
|
10 |
+
max_length = 16
|
11 |
+
num_beams = 4
|
12 |
+
gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
|
13 |
|
14 |
|
15 |
def predict_step(img_array):
|
|
|
16 |
i_image = Image.fromarray(img_array)
|
17 |
|
18 |
if i_image.mode != "RGB":
|
19 |
i_image = i_image.convert(mode="RGB")
|
20 |
|
21 |
+
pixel_values = feature_extractor(images=i_image, return_tensors="pt", do_normalize=True).pixel_values
|
22 |
+
|
23 |
+
output_ids = model.generate(pixel_values, **gen_kwargs)
|
24 |
+
|
25 |
+
pred = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
26 |
+
pred = [p.strip() for p in pred]
|
27 |
+
return pred
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|