Spaces:
Runtime error
Runtime error
sunny-annie
commited on
Commit
•
e24264c
1
Parent(s):
fab7731
Upload 4 files
Browse files- feature_extractor_v3.joblib +3 -0
- img-2-txt.py +55 -0
- model_weights_i2t_fin.pt +3 -0
- tokenizer_v3.joblib +3 -0
feature_extractor_v3.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6de31b46e55b824d28a5daab95de36f12f7cee1600bda97c7496433415c425c0
|
3 |
+
size 361
|
img-2-txt.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import torch
|
3 |
+
from joblib import load
|
4 |
+
from PIL import Image
|
5 |
+
from transformers import VisionEncoderDecoderModel
|
6 |
+
|
7 |
+
device = 'cpu'
|
8 |
+
|
9 |
+
# tokenizer = load("./pages/tokenizer_v3.joblib")
|
10 |
+
# feature_extractor = load("./pages/feature_extractor_v3.joblib")
|
11 |
+
tokenizer = load("tokenizer_v3.joblib")
|
12 |
+
feature_extractor = load("feature_extractor_v3.joblib")
|
13 |
+
|
14 |
+
model = VisionEncoderDecoderModel.from_pretrained("dumperize/movie-picture-captioning")
|
15 |
+
# model = load("model_img2txt_v3.joblib")
|
16 |
+
model.load_state_dict(torch.load("model_weights_i2t_fin.pt", map_location=torch.device('cpu')))
|
17 |
+
# model.eval()
|
18 |
+
|
19 |
+
max_length = 512
|
20 |
+
min_length = 32
|
21 |
+
num_beams = 7
|
22 |
+
gen_kwargs = {"max_length": max_length, "min_length": min_length, "num_beams": num_beams}
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
uploaded_file = st.file_uploader("Выберите изображение обложки книги в формате jpeg или jpg...", type=["jpg", "jpeg"])
|
27 |
+
if uploaded_file is not None:
|
28 |
+
image = Image.open(uploaded_file)
|
29 |
+
st.image(image, caption='Загруженное изображение')
|
30 |
+
image = image.resize([224,224])
|
31 |
+
if image.mode != "RGB":
|
32 |
+
image = image.convert(mode="RGB")
|
33 |
+
|
34 |
+
pixel_values = feature_extractor(images=[image], return_tensors="pt").pixel_values
|
35 |
+
pixel_values = pixel_values.to(device)
|
36 |
+
|
37 |
+
output_ids = model.generate(pixel_values, **gen_kwargs)
|
38 |
+
|
39 |
+
preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
40 |
+
preds = [pred.strip() for pred in preds]
|
41 |
+
st.write(preds[0])
|
42 |
+
|
43 |
+
|
44 |
+
# image = Image.open(image_path)
|
45 |
+
# image = image.resize([224,224])
|
46 |
+
# if image.mode != "RGB":
|
47 |
+
# image = image.convert(mode="RGB")
|
48 |
+
|
49 |
+
# pixel_values = feature_extractor(images=[image], return_tensors="pt").pixel_values
|
50 |
+
# pixel_values = pixel_values.to(device)
|
51 |
+
|
52 |
+
# output_ids = model.generate(pixel_values, **gen_kwargs)
|
53 |
+
|
54 |
+
# preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
55 |
+
# print([pred.strip() for pred in preds])
|
model_weights_i2t_fin.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6753b600e5d753ad7420a92aa4dcd3f5d860f8e9b5c933f059144e4e579d938e
|
3 |
+
size 1171154606
|
tokenizer_v3.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cc0b03a6f6232c82c5d9afa89f8d53f71d1b324e9b3845869d4871fa0ebe87d
|
3 |
+
size 2617590
|