Spaces:

salomonsky
/

oraculo

Running

App Files Files Community

salomonsky commited on Sep 3, 2023

Commit

1345bfc

1 Parent(s): 38804e8

Update inference.py

Browse files

Files changed (1) hide show

inference.py +124 -110

inference.py CHANGED Viewed

@@ -7,7 +7,7 @@ from glob import glob
 import torch, face_detection
 from models import Wav2Lip
 import platform
 parser = argparse.ArgumentParser(description='Inference code to lip-sync videos in the wild using Wav2Lip models')
@@ -67,116 +67,130 @@ def get_smoothened_boxes(boxes, T):
 	return boxes
 def face_detect(images):
-	# TODO 识别头像信息
-	detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D,
-											flip_input=False, device=device)
-	batch_size = args.face_det_batch_size
-	while 1:
-		predictions = []
-		try:
-			for i in tqdm(range(0, len(images), batch_size)):
-				predictions.extend(detector.get_detections_for_batch(np.array(images[i:i + batch_size])))
-		except RuntimeError:
-			if batch_size == 1:
-				raise RuntimeError('Image too big to run face detection on GPU. Please use the --resize_factor argument')
-			batch_size //= 2
-			print('Recovering from OOM error; New batch size: {}'.format(batch_size))
-			continue
-		break
-	head_exist = []
-	results = []
-	pady1, pady2, padx1, padx2 = args.pads
-	first_head_rect = None
-	first_head_image =None
-	for rect, image in zip(predictions, images):
-		if rect is not None:
-			first_head_rect = rect
-			first_head_image = image
-			break
-	for rect, image in zip(predictions, images):
-		if rect is None:
-			head_exist.append(False)
-			if len(results)==0:
-				y1 = max(0, first_head_rect[1] - pady1)
-				y2 = min(first_head_image.shape[0], first_head_rect[3] + pady2)
-				x1 = max(0, first_head_rect[0] - padx1)
-				x2 = min(first_head_image.shape[1], first_head_rect[2] + padx2)
-				results.append([x1, y1, x2, y2])
-			else:
-				results.append(results[-1])
-			# cv2.imwrite('temp/faulty_frame.jpg', image) # check this frame where the face was not detected.
-			# raise ValueError('Face not detected! Ensure the video contains a face in all the frames.')
-		else:
-			head_exist.append(True)
-			y1 = max(0, rect[1] - pady1)
-			y2 = min(image.shape[0], rect[3] + pady2)
-			x1 = max(0, rect[0] - padx1)
-			x2 = min(image.shape[1], rect[2] + padx2)
-			results.append([x1, y1, x2, y2])
-	boxes = np.array(results)
-	if not args.nosmooth: boxes = get_smoothened_boxes(boxes, T=5)
-	results = [[image[y1: y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)]
-	del detector
-	return results,head_exist
 def datagen(frames, mels):
-	img_batch,head_exist_batch, mel_batch, frame_batch, coords_batch = [], [], [], [],[]
-	# ***************************1、识别人脸对应的位置坐标，未识别的人脸的帧对应为None ***************************
-	if args.box[0] == -1:
-		if not args.static:
-			face_det_results,head_exist = face_detect(frames) # BGR2RGB for CNN face detection
-		else:
-			face_det_results,head_exist = face_detect([frames[0]])
-	else:
-		print('Using the specified bounding box instead of face detection...')
-		y1, y2, x1, x2 = args.box
-		face_det_results = [[f[y1: y2, x1:x2], (y1, y2, x1, x2)] for f in frames]
-		head_exist = [True]*len(frames)
-	for i, m in enumerate(mels):
-		#获取对应的一组音频对应的帧下标idx
-		idx = 0 if args.static else i%len(frames)
-		#获取对应的一组音频对应的帧
-		frame_to_save = frames[idx].copy()
-		#获取对应的一组音频对应的帧对应的人脸坐标
-		face, coords = face_det_results[idx].copy()
-		face = cv2.resize(face, (args.img_size, args.img_size))
-		head_exist_batch.append(head_exist[idx])
-		img_batch.append(face)
-		mel_batch.append(m)
-		frame_batch.append(frame_to_save)
-		coords_batch.append(coords)
-		if len(img_batch) >= args.wav2lip_batch_size:
-			img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
-			img_masked = img_batch.copy()
-			img_masked[:, args.img_size//2:] = 0
-			img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
-			mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
-			yield img_batch,head_exist_batch, mel_batch, frame_batch, coords_batch
-			img_batch,head_exist_batch, mel_batch, frame_batch, coords_batch = [],[], [], [], []
-	if len(img_batch) > 0:
-		img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
-		img_masked = img_batch.copy()
-		img_masked[:, args.img_size//2:] = 0
-		img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
-		mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
-		yield img_batch,head_exist_batch, mel_batch, frame_batch, coords_batch
 mel_step_size = 16
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -313,4 +327,4 @@ def main():
 	subprocess.call(command, shell=platform.system() != 'Windows')
 if __name__ == '__main__':
-	main()

 import torch, face_detection
 from models import Wav2Lip
 import platform
+import cv2
 parser = argparse.ArgumentParser(description='Inference code to lip-sync videos in the wild using Wav2Lip models')
 	return boxes
 def face_detect(images):
+    detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D,
+                                            flip_input=False, device=device)
+    batch_size = args.face_det_batch_size
+    last_face = None # Agregar la variable para guardar la última imagen detectada
+    while 1:
+        predictions = []
+        try:
+            for i in tqdm(range(0, len(images), batch_size)):
+                predictions.extend(detector.get_detections_for_batch(np.array(images[i:i + batch_size])))
+        except RuntimeError:
+            if batch_size == 1:
+                raise RuntimeError('Image too big to run face detection on GPU. Please use the --resize_factor argument')
+            batch_size //= 2
+            print('Recovering from OOM error; New batch size: {}'.format(batch_size))
+            continue
+        break
+    head_exist = []
+    results = []
+    pady1, pady2, padx1, padx2 = args.pads
+    first_head_rect = None
+    first_head_image =None
+    for rect, image in zip(predictions, images):
+        if rect is not None:
+            first_head_rect = rect
+            first_head_image = image
+            break
+    for rect, image in zip(predictions, images):
+        if rect is None:
+            head_exist.append(False)
+            if len(results)==0:
+                y1 = max(0, first_head_rect[1] - pady1)
+                y2 = min(first_head_image.shape[0], first_head_rect[3] + pady2)
+                x1 = max(0, first_head_rect[0] - padx1)
+                x2 = min(first_head_image.shape[1], first_head_rect[2] + padx2)
+                results.append([x1, y1, x2, y2])
+            else:
+                results.append(results[-1])
+        else:
+            head_exist.append(True)
+            y1 = max(0, rect[1] - pady1)
+            y2 = min(image.shape[0], rect[3] + pady2)
+            x1 = max(0, rect[0] - padx1)
+            x2 = min(image.shape[1], rect[2] + padx2)
+            results.append([x1, y1, x2, y2])
+            # Agregar la línea de código para guardar la imagen
+            last_face = image[y1: y2, x1:x2]
+            cv2.imwrite("last_face.jpg", last_face)
+    boxes = np.array(results)
+    if not args.nosmooth: boxes = get_smoothened_boxes(boxes, T=5)
+    results = [[image[y1: y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)]
+    del detector
+    return results,head_exist
+import cv2
 def datagen(frames, mels):
+    img_batch,head_exist_batch, mel_batch, frame_batch, coords_batch = [], [], [], [],[]
+    # ***************************1、识别人脸对应的位置坐标，未识别的人脸的帧对应为None ***************************
+    if args.box[0] == -1:
+        if not args.static:
+            face_det_results,head_exist = face_detect(frames) # BGR2RGB for CNN face detection
+        else:
+            face_det_results,head_exist = face_detect([frames[0]])
+    else:
+        print('Using the specified bounding box instead of face detection...')
+        y1, y2, x1, x2 = args.box
+        face_det_results = [[f[y1: y2, x1:x2], (y1, y2, x1, x2)] for f in frames]
+        head_exist = [True]*len(frames)
+    for i, m in enumerate(mels):
+        #获取对应的一组音频对应的帧下标idx
+        idx = 0 if args.static else i%len(frames)
+        #获取对应的一组音频对应的帧
+        frame_to_save = frames[idx].copy()
+        #获取对应的一组音频对应的帧对应的人脸坐标
+        face, coords = face_det_results[idx].copy()
+        face = cv2.resize(face, (args.img_size, args.img_size))
+        head_exist_batch.append(head_exist[idx])
+        img_batch.append(face)
+        melspec = m
+        mel_batch.append(melspec)
+        frame_batch.append(frame_to_save)
+        coords_batch.append(coords)
+        if len(img_batch) >= args.wav2lip_batch_size:
+            img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
+            img_masked = img_batch.copy()
+            img_masked[:, args.img_size//2:] = 0
+            img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
+            mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
+            yield img_batch,head_exist_batch, mel_batch, frame_batch, coords_batch
+            img_batch,head_exist_batch, mel_batch, frame_batch, coords_batch = [],[], [], [], []
+    # Agregar la línea de código para leer la imagen guardada automáticamente
+    last_face = cv2.imread("last_face.jpg")
+    last_face = cv2.resize(last_face, (args.img_size, args.img_size))
+    img_batch.append(last_face)
+    melspec = mels[-1]
+    mel_batch.append(melspec)
+    frame_batch.append(frames[-1])
+    coords_batch.append(face_det_results[-1][1])
+    head_exist_batch.append(head_exist[-1])
+    if len(img_batch) > 0:
+        img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
+        img_masked = img_batch.copy()
+        img_masked[:, args.img_size//2:] = 0
+        img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.
+        mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
+        yield img_batch,head_exist_batch, mel_batch, frame_batch, coords_batch
 mel_step_size = 16
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 	subprocess.call(command, shell=platform.system() != 'Windows')
 if __name__ == '__main__':
+	main()