Spaces:

tosanoob
/

mediapipe_fastapi_20classes

Paused

App Files Files Community

mediapipe_fastapi_20classes / mediapipe_preprocess.py

tosanoob

First upload

aa25f09 verified 6 months ago

raw

history blame contribute delete

4.18 kB

	import mediapipe as mp
	import numpy as np
	import cv2
	import copy

	mp_holistic = mp.solutions.holistic
	mp_drawing = mp.solutions.drawing_utils
	width, height = 640, 480

	model = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

	def mediapipe_detection(image):
	# từ image, model dự đoán trả về kết quả (định dạng mặc định)
	results = model.process(image)
	return results

	def extract_keypoint(results,last):
	res = []
	if results.pose_landmarks:
	for p in results.pose_landmarks.landmark:
	res.append(np.array([p.x,p.y,p.z,p.visibility]))
	else:
	for _ in range(33):
	res.append(np.array([0,0,0,0]))
	#--------------
	if results.left_hand_landmarks:
	for p in results.left_hand_landmarks.landmark:
	res.append(np.array([p.x,p.y,p.z]))
	elif last!= None and last.left_hand_landmarks:
	for p in last.left_hand_landmarks.landmark:
	res.append(np.array([p.x,p.y,p.z]))
	else:
	for _ in range(21):
	res.append(np.array([0,0,0]))
	#---------------
	if results.right_hand_landmarks:
	for p in results.right_hand_landmarks.landmark:
	res.append(np.array([p.x,p.y,p.z]))
	elif last!=None and last.right_hand_landmarks:
	for p in last.right_hand_landmarks.landmark:
	res.append(np.array([p.x,p.y,p.z]))
	else:
	for _ in range(21):
	res.append(np.array([0,0,0]))
	return res

	def normalize_keypoint(res,img=None):
	#normalize keypoint
	x1,y1,x2,y2 = res[11][0]width,res[11][1]height,res[12][0]width,res[12][1]height
	try:
	cv2.circle(img,(int(x1),int(y1)),4,(0,255,255),-1)
	cv2.circle(img,(int(x2),int(y2)),4,(0,255,255),-1)
	except:
	# print("No img found")
	pass
	dis = np.sqrt((x1-x2)2+(y1-y2)2)
	x_cen = (res[11][0]+res[12][0])/2
	y_cen = (res[11][1]+res[12][1])/2
	vector = [0.5-x_cen,0.5-y_cen]
	scale = (200*width/640)/dis
	for i in range(len(res)):
	if res[i][0]==0 and res[i][1]==0:
	continue
	res[i][0] = vector[0]+res[i][0]
	res[i][1] = vector[1]+res[i][1]
	res[i][0] = 0.5+(res[i][0]-0.5)*scale
	res[i][1] = 0.5+(res[i][1]-0.5)*scale
	return res

	def update_mpresult(res,results,last):
	c = 0
	if results.pose_landmarks:
	for p in results.pose_landmarks.landmark:
	p.x = res[c][0]
	p.y = res[c][1]
	if(c==20 and p.y>1.1 and last): last.right_hand_landmarks = None
	elif(c==19 and p.y>1.1 and last): last.left_hand_landmarks = None
	c+=1
	else:
	for _ in range(33):
	c+=1
	if results.left_hand_landmarks:
	for p in results.left_hand_landmarks.landmark:
	p.x = res[c][0]
	p.y = res[c][1]
	c+=1
	else:
	if last!=None and last.left_hand_landmarks: results.left_hand_landmarks = copy.deepcopy(last.left_hand_landmarks)
	for _ in range(21):
	c+=1
	if results.right_hand_landmarks:
	for p in results.right_hand_landmarks.landmark:
	p.x = res[c][0]
	p.y = res[c][1]
	c+=1
	else:
	if last!=None and last.right_hand_landmarks: results.right_hand_landmarks = copy.deepcopy(last.right_hand_landmarks)
	for _ in range(21):
	c+=1
	return results

	def extract_keypoints_flatten(result, last, img=None):
	#đây là hàm chính thức
	res = extract_keypoint(result, last)
	res = normalize_keypoint(res,img)
	update_mpresult(res,result,last)
	return np.concatenate([x for x in res])

	def mediapipe_process(frames):
	"""Main function to call, process a batch of frames into numpy array for prediction"""
	sequence = []
	last = None
	for frame in frames:
	results = mediapipe_detection(frame)
	keypoints = extract_keypoints_flatten(results, last)
	last = copy.deepcopy(results)
	sequence.append(keypoints)
	return np.array(sequence)