OpenLenda / app.py
yutyan's picture
Add app
37f5c2f
raw
history blame
3.4 kB
from yolox.exp import get_exp
from yolox.data.datasets import COCO_CLASSES
from predictor import Predictor
import cv2
import gradio as gr
import torch
import subprocess
import tempfile
import time
from pathlib import Path
exp = get_exp("exps/openlenda_s.py", None)
model = exp.get_model()
model.eval()
ckpt_file = "models/openlenda_s.pth"
model.load_state_dict(torch.load(ckpt_file, map_location="cpu")["model"])
predictor = Predictor(
model, COCO_CLASSES, "cpu", False, False
)
def image_inference(image, confthre, nmsthre):
cv2.cvtColor(image, cv2.COLOR_RGB2BGR, image)
outputs, img_info = predictor.inference(image, confthre, nmsthre)
result_image = predictor.visual(outputs[0], img_info)
cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB, result_image)
return result_image
image_interface = gr.Interface(
fn=image_inference,
inputs=[
"image",
gr.Slider(0, 1, value=0.5, step=0.01, label="Confidence Threshold", ),
gr.Slider(0, 1, value=0.01, step=0.01, label="NMS Threshold")
],
examples=[["assets/sample.png", 0.5, 0.01]],
outputs=gr.Image(type="pil"),
title="OpenLenda image demo"
)
def video_inference(video_file, confthre, nmsthre, start_sec, duration):
start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))
suffix = Path(video_file).suffix
clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix)
subprocess.call(
f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split()
)
cap = cv2.VideoCapture(clip_temp_file.name)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file:
out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"MP4V"), fps, (width, height))
num_frames = 0
max_frames = duration * fps
while cap.isOpened():
try:
ret, frame = cap.read()
if not ret:
break
except Exception as e:
print(e)
continue
outputs, img_info = predictor.inference(frame, confthre, nmsthre)
result_frame = predictor.visual(outputs[0], img_info)
out.write(result_frame)
num_frames += 1
if num_frames == max_frames:
break
out.release()
out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False)
subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split())
return out_file.name
video_interface = gr.Interface(
fn=video_inference,
inputs=[
gr.Video(),
gr.Slider(0, 1, value=0.5, step=0.01, label="Confidence Threshold", ),
gr.Slider(0, 1, value=0.01, step=0.01, label="NMS Threshold"),
gr.Slider(0, 60, value=0, step=1, label="Start Second"),
gr.Slider(0, 10, value=3, step=1, label="Duration"),
],
outputs=gr.Video(),
title="OpenLenda video demo"
)
if __name__ == "__main__":
gr.TabbedInterface(
[image_interface, video_interface],
["Image", "Video"],
title="OpenLenda demo!",
).launch()