DawnC commited on
Commit
1044c24
1 Parent(s): 6e4ac01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -83
app.py CHANGED
@@ -6,7 +6,7 @@ import gradio as gr
6
  from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
7
  import torch.nn.functional as F
8
  from torchvision import transforms
9
- from PIL import Image, ImageDraw, ImageFont
10
  from data_manager import get_dog_description
11
  from urllib.parse import quote
12
  from ultralytics import YOLO
@@ -312,7 +312,7 @@ def _predict_single_dog(image):
312
  # return dogs
313
  # 此為如果後面調不好 使用的版本
314
 
315
- async def detect_multiple_dogs(image, conf_threshold=0.2, iou_threshold=0.3):
316
  results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
317
  dogs = []
318
  for box in results.boxes:
@@ -321,7 +321,7 @@ async def detect_multiple_dogs(image, conf_threshold=0.2, iou_threshold=0.3):
321
  confidence = box.conf.item()
322
  area = (xyxy[2] - xyxy[0]) * (xyxy[3] - xyxy[1])
323
  image_area = image.width * image.height
324
- if area > 0.01 * image_area: # 過濾掉太小的檢測框,但使用相對面積
325
  cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
326
  dogs.append((cropped_image, confidence, xyxy))
327
 
@@ -334,7 +334,7 @@ async def detect_multiple_dogs(image, conf_threshold=0.2, iou_threshold=0.3):
334
  confidence = box.conf.item()
335
  area = (xyxy[2] - xyxy[0]) * (xyxy[3] - xyxy[1])
336
  image_area = image.width * image.height
337
- if area > 0.01 * image_area and not is_box_duplicate(xyxy, [d[2] for d in dogs]):
338
  cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
339
  dogs.append((cropped_image, confidence, xyxy))
340
 
@@ -347,7 +347,6 @@ def is_box_duplicate(new_box, existing_boxes, iou_threshold=0.5):
347
  return False
348
 
349
  def calculate_iou(box1, box2):
350
- # 計算兩個邊界框的交集面積
351
  x1 = max(box1[0], box2[0])
352
  y1 = max(box1[1], box2[1])
353
  x2 = min(box1[2], box2[2])
@@ -473,7 +472,6 @@ def calculate_iou(box1, box2):
473
  # if __name__ == "__main__":
474
  # iface.launch()
475
 
476
-
477
  async def predict(image):
478
  if image is None:
479
  return "Please upload an image to start.", None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
@@ -482,65 +480,15 @@ async def predict(image):
482
  if isinstance(image, np.ndarray):
483
  image = Image.fromarray(image)
484
 
485
- dogs = await detect_multiple_dogs(image, conf_threshold=0.3, iou_threshold=0.5)
486
-
487
- if len(dogs) == 0:
 
 
488
  return await process_single_dog(image)
489
- elif len(dogs) == 1:
490
- # 如果只檢測到一隻狗,但圖像可能包含多隻狗,再次嘗試檢測
491
- if has_multiple_dogs(image):
492
- dogs = await detect_multiple_dogs(image, conf_threshold=0.1, iou_threshold=0.2)
493
- if len(dogs) == 1:
494
- return await process_single_dog(dogs[0][0])
495
  else:
496
- # 多狗情境
497
- color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
498
- explanations = []
499
- buttons = []
500
- annotated_image = image.copy()
501
- draw = ImageDraw.Draw(annotated_image)
502
- font = ImageFont.load_default()
503
-
504
- for i, (cropped_image, confidence, box) in enumerate(dogs):
505
- top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
506
- color = color_list[i % len(color_list)]
507
- draw.rectangle(box, outline=color, width=3)
508
- draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
509
-
510
- breed = topk_breeds[0]
511
- if top1_prob >= 0.5:
512
- description = get_dog_description(breed)
513
- formatted_description = format_description(description, breed)
514
- explanations.append(f"Dog {i+1}: {formatted_description}")
515
- elif top1_prob >= 0.2:
516
- dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
517
- dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
518
- explanations.append(dog_explanation)
519
- buttons.extend([gr.update(visible=True, value=f"Dog {i+1}: More about {breed}") for breed in topk_breeds[:3]])
520
- else:
521
- explanations.append(f"Dog {i+1}: The image is unclear or the breed is not in the dataset.")
522
-
523
- final_explanation = "\n\n".join(explanations)
524
- if buttons:
525
- final_explanation += "\n\nClick on a button to view more information about the breed."
526
- initial_state = {
527
- "explanation": final_explanation,
528
- "buttons": buttons,
529
- "show_back": True
530
- }
531
- return (final_explanation, annotated_image,
532
- buttons[0] if len(buttons) > 0 else gr.update(visible=False),
533
- buttons[1] if len(buttons) > 1 else gr.update(visible=False),
534
- buttons[2] if len(buttons) > 2 else gr.update(visible=False),
535
- gr.update(visible=True),
536
- initial_state)
537
- else:
538
- initial_state = {
539
- "explanation": final_explanation,
540
- "buttons": [],
541
- "show_back": False
542
- }
543
- return final_explanation, annotated_image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
544
 
545
  except Exception as e:
546
  error_msg = f"An error occurred: {str(e)}"
@@ -548,23 +496,14 @@ async def predict(image):
548
  return error_msg, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
549
 
550
  def has_multiple_dogs(image):
551
- # 使用簡單的啟發式方法來檢查圖像是否可能包含多隻狗
552
- # 這裡可以使用更複雜的方法,如特徵提取或輕量級模型
553
  gray = image.convert('L')
554
  edges = gray.filter(ImageFilter.FIND_EDGES)
555
  edge_pixels = np.array(edges)
556
- return np.sum(edge_pixels > 128) > image.width * image.height * 0.1 # 假設邊緣像素比例大於 10% 表示可能有多隻狗
557
 
558
  async def process_single_dog(image):
559
  top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(image)
560
- if top1_prob < 0.2:
561
- initial_state = {
562
- "explanation": "The image is unclear or the breed is not in the dataset. Please upload a clearer image of a dog.",
563
- "buttons": [],
564
- "show_back": False
565
- }
566
- return initial_state["explanation"], None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
567
-
568
  breed = topk_breeds[0]
569
  description = get_dog_description(breed)
570
 
@@ -576,7 +515,7 @@ async def process_single_dog(image):
576
  "show_back": False
577
  }
578
  return formatted_description, image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
579
- else:
580
  explanation = (
581
  f"The model couldn't confidently identify the breed. Here are the top 3 possible breeds:\n\n"
582
  f"1. **{topk_breeds[0]}** ({topk_probs_percent[0]} confidence)\n"
@@ -595,20 +534,74 @@ async def process_single_dog(image):
595
  "show_back": True
596
  }
597
  return explanation, image, buttons[0], buttons[1], buttons[2], gr.update(visible=True), initial_state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
 
599
- def show_details(choice, previous_output, initial_state):
600
  if not choice:
601
- return previous_output, gr.update(visible=True), initial_state
602
 
603
  try:
604
- breed = choice.split("More about ")[-1]
605
  description = get_dog_description(breed)
606
- formatted_description = format_description(description, breed)
607
- return formatted_description, gr.update(visible=True), initial_state
608
  except Exception as e:
609
- error_msg = f"An error occurred while showing details: {e}"
610
- print(error_msg) # 添加日誌輸出
611
- return error_msg, gr.update(visible=True), initial_state
612
 
613
  # 介面部分
614
  with gr.Blocks() as iface:
 
6
  from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
7
  import torch.nn.functional as F
8
  from torchvision import transforms
9
+ from PIL import Image, ImageDraw, ImageFont, ImageFilter
10
  from data_manager import get_dog_description
11
  from urllib.parse import quote
12
  from ultralytics import YOLO
 
312
  # return dogs
313
  # 此為如果後面調不好 使用的版本
314
 
315
+ async def detect_multiple_dogs(image, conf_threshold=0.1, iou_threshold=0.3):
316
  results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
317
  dogs = []
318
  for box in results.boxes:
 
321
  confidence = box.conf.item()
322
  area = (xyxy[2] - xyxy[0]) * (xyxy[3] - xyxy[1])
323
  image_area = image.width * image.height
324
+ if area > 0.005 * image_area: # 降低面積閾值以檢測更多狗
325
  cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
326
  dogs.append((cropped_image, confidence, xyxy))
327
 
 
334
  confidence = box.conf.item()
335
  area = (xyxy[2] - xyxy[0]) * (xyxy[3] - xyxy[1])
336
  image_area = image.width * image.height
337
+ if area > 0.005 * image_area and not is_box_duplicate(xyxy, [d[2] for d in dogs]):
338
  cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
339
  dogs.append((cropped_image, confidence, xyxy))
340
 
 
347
  return False
348
 
349
  def calculate_iou(box1, box2):
 
350
  x1 = max(box1[0], box2[0])
351
  y1 = max(box1[1], box2[1])
352
  x2 = min(box1[2], box2[2])
 
472
  # if __name__ == "__main__":
473
  # iface.launch()
474
 
 
475
  async def predict(image):
476
  if image is None:
477
  return "Please upload an image to start.", None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
 
480
  if isinstance(image, np.ndarray):
481
  image = Image.fromarray(image)
482
 
483
+ # 首先使用YOLO檢測是否有多隻狗
484
+ dogs = await detect_multiple_dogs(image)
485
+
486
+ if len(dogs) <= 1:
487
+ # 單狗情境或沒有檢測到狗,使用整張圖片進行預測
488
  return await process_single_dog(image)
 
 
 
 
 
 
489
  else:
490
+ # 多狗情境
491
+ return await process_multiple_dogs(image, dogs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
 
493
  except Exception as e:
494
  error_msg = f"An error occurred: {str(e)}"
 
496
  return error_msg, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
497
 
498
  def has_multiple_dogs(image):
 
 
499
  gray = image.convert('L')
500
  edges = gray.filter(ImageFilter.FIND_EDGES)
501
  edge_pixels = np.array(edges)
502
+ return np.sum(edge_pixels > 128) > image.width * image.height * 0.1
503
 
504
  async def process_single_dog(image):
505
  top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(image)
506
+
 
 
 
 
 
 
 
507
  breed = topk_breeds[0]
508
  description = get_dog_description(breed)
509
 
 
515
  "show_back": False
516
  }
517
  return formatted_description, image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
518
+ elif top1_prob >= 0.2:
519
  explanation = (
520
  f"The model couldn't confidently identify the breed. Here are the top 3 possible breeds:\n\n"
521
  f"1. **{topk_breeds[0]}** ({topk_probs_percent[0]} confidence)\n"
 
534
  "show_back": True
535
  }
536
  return explanation, image, buttons[0], buttons[1], buttons[2], gr.update(visible=True), initial_state
537
+ else:
538
+ initial_state = {
539
+ "explanation": "The image is unclear or the breed is not in the dataset. Please upload a clearer image of a dog.",
540
+ "buttons": [],
541
+ "show_back": False
542
+ }
543
+ return initial_state["explanation"], None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
544
+
545
+ async def process_multiple_dogs(image, dogs):
546
+ color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
547
+ explanations = []
548
+ buttons = []
549
+ annotated_image = image.copy()
550
+ draw = ImageDraw.Draw(annotated_image)
551
+ font = ImageFont.load_default()
552
+
553
+ for i, (cropped_image, _, box) in enumerate(dogs):
554
+ top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
555
+ color = color_list[i % len(color_list)]
556
+ draw.rectangle(box, outline=color, width=3)
557
+ draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
558
+
559
+ breed = topk_breeds[0]
560
+ if top1_prob >= 0.5:
561
+ description = get_dog_description(breed)
562
+ formatted_description = format_description(description, breed)
563
+ explanations.append(f"Dog {i+1}: {formatted_description}")
564
+ elif top1_prob >= 0.2:
565
+ dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
566
+ dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
567
+ explanations.append(dog_explanation)
568
+ buttons.extend([gr.update(visible=True, value=f"Dog {i+1}: More about {breed}") for breed in topk_breeds[:3]])
569
+ else:
570
+ explanations.append(f"Dog {i+1}: The image is unclear or the breed is not in the dataset.")
571
+
572
+ final_explanation = "\n\n".join(explanations)
573
+ if buttons:
574
+ final_explanation += "\n\nClick on a button to view more information about the breed."
575
+ initial_state = {
576
+ "explanation": final_explanation,
577
+ "buttons": buttons,
578
+ "show_back": True
579
+ }
580
+ return (final_explanation, annotated_image,
581
+ buttons[0] if len(buttons) > 0 else gr.update(visible=False),
582
+ buttons[1] if len(buttons) > 1 else gr.update(visible=False),
583
+ buttons[2] if len(buttons) > 2 else gr.update(visible=False),
584
+ gr.update(visible=True),
585
+ initial_state)
586
+ else:
587
+ initial_state = {
588
+ "explanation": final_explanation,
589
+ "buttons": [],
590
+ "show_back": False
591
+ }
592
+ return final_explanation, annotated_image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
593
 
594
+ def show_details(choice, previous_output):
595
  if not choice:
596
+ return previous_output, gr.update(visible=True)
597
 
598
  try:
599
+ breed = choice.split("More about ")[-1].split(": ")[-1]
600
  description = get_dog_description(breed)
601
+ return format_description(description, breed), gr.update(visible=True)
 
602
  except Exception as e:
603
+ return f"An error occurred while showing details: {e}", gr.update(visible=True)
604
+
 
605
 
606
  # 介面部分
607
  with gr.Blocks() as iface: