import gradio as gr import cv2 import tempfile from ultralytics import YOLOv10 import pandas as pd from gradio import processing_utils df = pd.read_csv('image_class.csv') df = df[['name', 'class']] df.drop_duplicates(inplace=True) print(len(df)) df1 = pd.read_csv('image_class.csv') df1 = df1[['name', 'class', 'im_file']] df1['file_name'] = df1['im_file'].apply(lambda v: v.split('_')[-1].split('.')[0]) df1.drop(columns='im_file', inplace=True) df1.drop_duplicates(inplace=True) print(df1) print(len(df1)) def yolov10_inference(image, video, image_size, conf_threshold, iou_threshold): model = YOLOv10('./drug_yolov10.pt') # model = YOLOv10('./pills_yolov10.pt') if image: results = model.predict(source=image, imgsz=image_size, conf=conf_threshold, iou=iou_threshold) annotated_image = results[0].plot() # Print the detected objects' information (class, coordinates, and probability) box = results[0].boxes cls = [int(c) for c in box.cls.tolist()] cnf = [round(f,2) for f in box.conf.tolist()] clcf = '\n'.join([f'Class:{cls[i]} , Confidence:{cnf[i]*100}%' for i in range(len(cls))]) #list(zip(cls,cnf)) name = '\n'.join([df[df['class']==n]['name'].item() for n in cls]) file_name = image.split('_')[-1].split('.')[0] print(f'file name: {file_name}') try: drug_name = df1[df1['file_name']==file_name]['name'].item() drug_class = df1[df1['file_name']==file_name]['class'].item() drug_name = f'{drug_class}, {drug_name}' print(drug_name) except: drug_name = 'No have data' # print(cls) # print(name) # print(type(clcf)) # print("Object type:", box.cls) # print("Coordinates:", box.xyxy) # print("Probability:", box.conf) # print('box.class data tyupe', type(box.cls.tolist())) return annotated_image[:, :, ::-1], None, clcf, name, file_name, drug_name else: video_path = tempfile.mktemp(suffix=".webm") with open(video_path, "wb") as f: with open(video, "rb") as g: f.write(g.read()) cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_video_path = tempfile.mktemp(suffix=".webm") out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'vp80'), fps, (frame_width, frame_height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold, iou=iou_threshold) annotated_frame = results[0].plot() out.write(annotated_frame) cap.release() out.release() return None, output_video_path def yolov10_inference_for_examples(image, image_size, conf_threshold, iou_threshold): annotated_image, _, output_class, output_name = yolov10_inference(image, None, image_size, conf_threshold, iou_threshold) return annotated_image#, None, output_class, output_name def app(): with gr.Blocks(): with gr.Row(): with gr.Column(): # image = gr.Image(type="pil", label="Image", visible=True) image = gr.Image(type="filepath", label="Image", visible=True) video = gr.Video(label="Video", visible=False) input_type = gr.Radio( choices=["Image", "Video"], value="Image", label="Input Type", ) file_name = gr.Textbox(label='File Name') file_name.change(outputs=file_name) drug_name = gr.Textbox(label='Drug Name (Ground Truth)') drug_name.change(outputs=drug_name) image_size = gr.Slider( label="Image Size", minimum=0, maximum=1280, step=10, value=640, ) conf_threshold = gr.Slider( label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.25, ) iou_threshold = gr.Slider( label="IOU Threshold", minimum=0, maximum=1, step=0.1, value=0.6, ) yolov10_infer = gr.Button(value="Detect Objects") with gr.Column(): output_image = gr.Image(type="numpy", label="Annotated Image", visible=True) output_video = gr.Video(label="Annotated Video", visible=False) output_name = gr.Textbox(label='Predicted Drug Name') output_name.change(outputs=output_name) output_class = gr.Textbox(label='Predicted Class') output_class.change(outputs=output_class) def update_visibility(input_type): image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False) video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True) output_image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False) output_video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True) print(f'updated image: {image}') return image, video, output_image, output_video input_type.change( fn=update_visibility, inputs=[input_type], outputs=[image, video, output_image, output_video], ) def run_inference(image, video, image_size, conf_threshold, iou_threshold, input_type): if input_type == "Image": return yolov10_inference(image, None, image_size, conf_threshold, iou_threshold) else: return yolov10_inference(None, video, image_size, conf_threshold, iou_threshold) yolov10_infer.click( fn=run_inference, inputs=[image, video, image_size, conf_threshold, iou_threshold, input_type], outputs=[output_image, output_video, output_class, output_name, file_name, drug_name], ) gr.Examples( examples = [ ['./RXBASE-600_00071-1014-68_NLMIMAGE10_5715ABFD.jpg', 280, 0.2, 0.6], ['./RXNAV-600_13668-0095-90_RXNAVIMAGE10_D145E8EF.jpg', 640, 0.2, 0.7], ['./RXBASE-600_00074-7126-13_NLMIMAGE10_C003606B.jpg', 640, 0.2, 0.8], ], fn=yolov10_inference_for_examples, inputs=[ image, image_size, conf_threshold, iou_threshold, ], outputs=[output_image], cache_examples='lazy', ) gradio_app = gr.Blocks() with gradio_app: gr.HTML( """

YOLOv10: Real-Time End-to-End Object Detection

""") gr.HTML( """

arXiv | github

""") with gr.Row(): with gr.Column(): app() if __name__ == '__main__': gradio_app.launch()