Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import AutoModel, AutoTokenizer | |
from PIL import Image | |
from decord import VideoReader, cpu | |
import base64 | |
import io | |
import spaces | |
# Load model | |
model_path = 'openbmb/MiniCPM-V-2_6' | |
model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16) | |
model = model.to(device='cuda') | |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) | |
model.eval() | |
MAX_NUM_FRAMES = 64 | |
def encode_image(image): | |
if not isinstance(image, Image.Image): | |
image = Image.open(image).convert("RGB") | |
max_size = 448*16 | |
if max(image.size) > max_size: | |
w,h = image.size | |
if w > h: | |
new_w = max_size | |
new_h = int(h * max_size / w) | |
else: | |
new_h = max_size | |
new_w = int(w * max_size / h) | |
image = image.resize((new_w, new_h), resample=Image.BICUBIC) | |
return image | |
def encode_video(video_path): | |
vr = VideoReader(video_path, ctx=cpu(0)) | |
sample_fps = round(vr.get_avg_fps() / 1) | |
frame_idx = [i for i in range(0, len(vr), sample_fps)] | |
if len(frame_idx) > MAX_NUM_FRAMES: | |
frame_idx = frame_idx[:MAX_NUM_FRAMES] | |
video = vr.get_batch(frame_idx).asnumpy() | |
video = [Image.fromarray(v.astype('uint8')) for v in video] | |
video = [encode_image(v) for v in video] | |
return video | |
def analyze_video(prompt, video): | |
if isinstance(video, str): | |
video_path = video | |
else: | |
video_path = video.name | |
encoded_video = encode_video(video_path) | |
context = [ | |
{"role": "user", "content": [prompt] + encoded_video} | |
] | |
params = { | |
'sampling': True, | |
'top_p': 0.8, | |
'top_k': 100, | |
'temperature': 0.7, | |
'repetition_penalty': 1.05, | |
"max_new_tokens": 2048, | |
"max_inp_length": 4352, | |
"use_image_id": False, | |
"max_slice_nums": 1 if len(encoded_video) > 16 else 2 | |
} | |
response = model.chat(image=None, msgs=context, tokenizer=tokenizer, **params) | |
return response | |
with gr.Blocks() as demo: | |
gr.Markdown("# Video Analyzer") | |
with gr.Row(): | |
with gr.Column(): | |
prompt_input = gr.Textbox(label="Prompt") | |
video_input = gr.Video(label="Upload Video") | |
with gr.Column(): | |
output = gr.Textbox(label="Analysis Result") | |
analyze_button = gr.Button("Analyze Video") | |
analyze_button.click(fn=analyze_video, inputs=[prompt_input, video_input], outputs=output) | |
demo.launch() | |