jarif commited on
Commit
113b8e7
·
verified ·
1 Parent(s): 1c9ef1e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +47 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
3
+ import torch
4
+ from PIL import Image
5
+ import matplotlib.pyplot as plt
6
+
7
+ # Load model and tokenizer
8
+ model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
9
+ feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
10
+ tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
11
+
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ model.to(device)
14
+
15
+ # Define generation parameters
16
+ max_length = 16
17
+ num_beams = 4
18
+ gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
19
+
20
+ def predict_step(image):
21
+ # Process the input image
22
+ i_image = Image.fromarray(image)
23
+ if i_image.mode != "RGB":
24
+ i_image = i_image.convert(mode="RGB")
25
+
26
+ # Prepare image for the model
27
+ pixel_values = feature_extractor(images=[i_image], return_tensors="pt").pixel_values
28
+ pixel_values = pixel_values.to(device)
29
+
30
+ # Generate prediction
31
+ output_ids = model.generate(pixel_values, **gen_kwargs)
32
+ preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
33
+ preds = [pred.strip() for pred in preds]
34
+
35
+ return preds[0]
36
+
37
+ # Define Gradio interface
38
+ interface = gr.Interface(
39
+ fn=predict_step,
40
+ inputs=gr.inputs.Image(type="numpy"),
41
+ outputs="text",
42
+ title="Image-to-Text Conversion",
43
+ description="Upload an image, and this model will generate a textual description of the image."
44
+ )
45
+
46
+ # Launch the interface
47
+ interface.launch()
requirements.txt ADDED
Binary file (2.49 kB). View file