lep1's picture
Update app.py
d7c820e verified
raw
history blame
4.03 kB
"""
Reference
- https://docs.streamlit.io/library/api-reference/layout
- https://github.com/CodingMantras/yolov8-streamlit-detection-tracking/blob/master/app.py
- https://huggingface.co/keremberke/yolov8m-valorant-detection/tree/main
- https://docs.ultralytics.com/usage/python/
"""
import time
import PIL
import streamlit as st
import torch
from ultralyticsplus import YOLO, render_result
from gtts import gTTS
import os
import pygame
from convert import convert_to_braille_unicode, parse_xywh_and_class
def load_model(model_path):
"""load model from path"""
model = YOLO(model_path)
return model
def load_image(image_path):
"""load image from path"""
image = PIL.Image.open(image_path)
return image
# title
st.title("Braille Pattern Detection")
# sidebar
st.sidebar.header("Detection Config")
conf = float(st.sidebar.slider("Class Confidence", 10, 75, 15)) / 100
iou = float(st.sidebar.slider("IoU Threshold", 10, 75, 15)) / 100
model_path = "snoop2head/yolov8m-braille"
try:
model = load_model(model_path)
model.overrides["conf"] = conf # NMS confidence threshold
model.overrides["iou"] = iou # NMS IoU threshold
model.overrides["agnostic_nms"] = False # NMS class-agnostic
model.overrides["max_det"] = 1000 # maximum number of detections per image
except Exception as ex:
print(ex)
st.write(f"Unable to load model. Check the specified path: {model_path}")
source_img = None
source_img = st.sidebar.file_uploader(
"Choose an image...", type=("jpg", "jpeg", "png", "bmp", "webp")
)
col1, col2 = st.columns(2)
# left column of the page body
with col1:
if source_img is None:
default_image_path = "./image/test_1.jpg"
image = load_image(default_image_path)
st.image(
default_image_path, caption="Example Input Image", use_column_width=True
)
else:
image = load_image(source_img)
st.image(source_img, caption="Uploaded Image", use_column_width=True)
# right column of the page body
with col2:
with st.spinner("Wait for it..."):
start_time = time.time()
try:
with torch.no_grad():
res = model.predict(
image, save=True, save_txt=True, exist_ok=True, conf=conf
)
boxes = res[0].boxes # first image
res_plotted = res[0].plot()[:, :, ::-1]
list_boxes = parse_xywh_and_class(boxes)
st.image(res_plotted, caption="Detected Image", use_column_width=True)
IMAGE_DOWNLOAD_PATH = f"runs/detect/predict/image0.jpg"
except Exception as ex:
st.write("Please upload image with types of JPG, JPEG, PNG ...")
try:
st.success(f"Done! Inference time: {time.time() - start_time:.2f} seconds")
st.subheader("Detected Braille Patterns")
for box_line in list_boxes:
str_left_to_right = ""
box_classes = box_line[:, -1]
for each_class in box_classes:
str_left_to_right += convert_to_braille_unicode(model.names[int(each_class)])
result += str_left_to_right + "\n"
st.write(str_left_to_right)
def text_to_speech_gtts(text, lang='en'):
# 将文本转换为语音
tts = gTTS(text=text, lang=lang)
# 保存音频文件
tts.save("./output_audio/output.mp3")
audio_file_path = "./output_audio/output.mp3"
text_to_speech_gtts(result)
def play_mp3(file_path):
# 初始化 pygame
pygame.mixer.init()
# 加载 MP3 文件
pygame.mixer.music.load(file_path)
# 播放 MP3 文件
pygame.mixer.music.play()
# 等待音频播放完毕
while pygame.mixer.music.get_busy():
continue
# 调用函数,播放 MP3 文件
play_mp3("./output_audio/output.mp3")
except Exception as ex:
st.write("Please try again with images with types of JPG, JPEG, PNG ...")
with open(audio_file_path, "rb") as fl:
st.download_button(
"Download Braille Audio",
data=fl,
file_name="detected_braille.mp3",
mime="audio/mp3",
)