Spaces:

lep1
/

braille-recognition-model

Runtime error

App Files Files Community

braille-recognition-model / app.py

lep1

Update app.py

8d29462 verified 28 days ago

raw

history blame

3.99 kB

	"""
	Reference
	- https://docs.streamlit.io/library/api-reference/layout
	- https://github.com/CodingMantras/yolov8-streamlit-detection-tracking/blob/master/app.py
	- https://huggingface.co/keremberke/yolov8m-valorant-detection/tree/main
	- https://docs.ultralytics.com/usage/python/
	"""
	import time
	import PIL

	import streamlit as st
	import torch
	from ultralyticsplus import YOLO, render_result

	from gtts import gTTS
	import os
	import pygame

	from convert import convert_to_braille_unicode, parse_xywh_and_class


	def load_model(model_path):
	"""load model from path"""
	model = YOLO(model_path)
	return model


	def load_image(image_path):
	"""load image from path"""
	image = PIL.Image.open(image_path)
	return image

	# title
	st.title("Braille Pattern Detection")

	# sidebar
	st.sidebar.header("Detection Config")

	conf = float(st.sidebar.slider("Class Confidence", 10, 75, 15)) / 100
	iou = float(st.sidebar.slider("IoU Threshold", 10, 75, 15)) / 100

	model_path = "snoop2head/yolov8m-braille"

	try:
	model = load_model(model_path)
	model.overrides["conf"] = conf # NMS confidence threshold
	model.overrides["iou"] = iou # NMS IoU threshold
	model.overrides["agnostic_nms"] = False # NMS class-agnostic
	model.overrides["max_det"] = 1000 # maximum number of detections per image

	except Exception as ex:
	print(ex)
	st.write(f"Unable to load model. Check the specified path: {model_path}")

	source_img = None

	source_img = st.sidebar.file_uploader(
	"Choose an image...", type=("jpg", "jpeg", "png", "bmp", "webp")
	)
	col1, col2 = st.columns(2)

	# left column of the page body
	with col1:
	if source_img is None:
	default_image_path = "./image/test_1.jpg"
	image = load_image(default_image_path)
	st.image(
	default_image_path, caption="Example Input Image", use_column_width=True
	)
	else:
	image = load_image(source_img)
	st.image(source_img, caption="Uploaded Image", use_column_width=True)

	# right column of the page body
	with col2:
	with st.spinner("Wait for it..."):
	start_time = time.time()
	try:
	with torch.no_grad():
	res = model.predict(
	image, save=True, save_txt=True, exist_ok=True, conf=conf
	)
	boxes = res[0].boxes # first image
	res_plotted = res[0].plot()[:, :, ::-1]

	list_boxes = parse_xywh_and_class(boxes)

	st.image(res_plotted, caption="Detected Image", use_column_width=True)
	IMAGE_DOWNLOAD_PATH = f"runs/detect/predict/image0.jpg"

	except Exception as ex:
	st.write("Please upload image with types of JPG, JPEG, PNG ...")


	try:
	st.success(f"Done! Inference time: {time.time() - start_time:.2f} seconds")
	st.subheader("Detected Braille Patterns")
	for box_line in list_boxes:
	str_left_to_right = ""
	box_classes = box_line[:, -1]
	for each_class in box_classes:
	str_left_to_right += convert_to_braille_unicode(model.names[int(each_class)])
	result += str_left_to_right + "\n"
	st.write(str_left_to_right)
	except Exception as ex:
	st.write("Please try again with images with types of JPG, JPEG, PNG ...")

	def text_to_speech_gtts(text, lang='en', filename="output.mp3"):
	"""将文本转换为语音并保存音频文件"""
	tts = gTTS(text=text, lang=lang)
	audio_path = "./output_audio/output.mp3"
	tts.save(audio_path)
	return audio_path

	try:
	# 生成语音文件
	audio_file_path = text_to_speech_gtts(result)

	# 在 Streamlit 中播放音频
	st.audio(audio_file_path, format="audio/mp3")

	# 提供下载按钮
	with open(audio_file_path, "rb") as audio_file:
	st.download_button(
	label="Download Braille Audio",
	data=audio_file,
	file_name="detected_braille.mp3",
	mime="audio/mp3",
	)

	except Exception as ex:
	st.write("An error occurred while processing the audio.")