sindhuhegde commited on
Commit
850b849
·
1 Parent(s): 5c45973

Update app

Browse files
Files changed (2) hide show
  1. app.py +7 -1
  2. preprocess/inference_preprocess.py +1 -1
app.py CHANGED
@@ -1028,18 +1028,21 @@ def process_video_activespeaker(video_path, global_speaker, num_avg_frames):
1028
  orig_frames, status = load_video_frames(test_video_25fps)
1029
  if status != "success":
1030
  return None, status
 
1031
 
1032
  # Extract and save the audio file
1033
  orig_wav_file, status = extract_audio(video_path, result_folder)
1034
  if status != "success":
1035
  return None, status
 
1036
 
1037
  # Pre-process and extract per-speaker tracks in each scene
1038
  print("Pre-processing the input video...")
1039
  status = subprocess.call("python preprocess/inference_preprocess.py --data_dir={}/temp --sd_root={}/crops --work_root={}/metadata --data_root={}".format(result_folder_input, result_folder_input, result_folder_input, video_path), shell=True)
1040
  if status != 0:
1041
  return None, "Error in pre-processing the input video, please check the input video and try again..."
1042
-
 
1043
  # Load the tracks file saved during pre-processing
1044
  with open('{}/metadata/tracks.pckl'.format(result_folder_input), 'rb') as file:
1045
  tracks = pickle.load(file)
@@ -1053,6 +1056,7 @@ def process_video_activespeaker(video_path, global_speaker, num_avg_frames):
1053
  track_dict[scene_num][i] = {}
1054
  for frame_num, bbox in zip(tracks[scene_num][i]['track']['frame'], tracks[scene_num][i]['track']['bbox']):
1055
  track_dict[scene_num][i][frame_num] = bbox
 
1056
 
1057
  # Get the total number of scenes
1058
  test_scenes = os.listdir("{}/crops".format(result_folder_input))
@@ -1061,6 +1065,7 @@ def process_video_activespeaker(video_path, global_speaker, num_avg_frames):
1061
  # Load the trained model
1062
  model = Transformer_RGB()
1063
  model = load_checkpoint(CHECKPOINT_PATH, model)
 
1064
 
1065
  # Compute the active speaker in each scene
1066
  output_tracks = {}
@@ -1149,6 +1154,7 @@ def process_video_activespeaker(video_path, global_speaker, num_avg_frames):
1149
  output_tracks[frame] = track_dict[scene_num][label][frame]
1150
 
1151
  # Save the output video
 
1152
  video_output, status = save_video(output_tracks, orig_frames.copy(), orig_wav_file, result_folder_output)
1153
  if status != "success":
1154
  return None, status
 
1028
  orig_frames, status = load_video_frames(test_video_25fps)
1029
  if status != "success":
1030
  return None, status
1031
+ print("Successfully loaded the frames")
1032
 
1033
  # Extract and save the audio file
1034
  orig_wav_file, status = extract_audio(video_path, result_folder)
1035
  if status != "success":
1036
  return None, status
1037
+ print("Successfully loaded the spectrograms")
1038
 
1039
  # Pre-process and extract per-speaker tracks in each scene
1040
  print("Pre-processing the input video...")
1041
  status = subprocess.call("python preprocess/inference_preprocess.py --data_dir={}/temp --sd_root={}/crops --work_root={}/metadata --data_root={}".format(result_folder_input, result_folder_input, result_folder_input, video_path), shell=True)
1042
  if status != 0:
1043
  return None, "Error in pre-processing the input video, please check the input video and try again..."
1044
+ print("Successfully preprocessed the video")
1045
+
1046
  # Load the tracks file saved during pre-processing
1047
  with open('{}/metadata/tracks.pckl'.format(result_folder_input), 'rb') as file:
1048
  tracks = pickle.load(file)
 
1056
  track_dict[scene_num][i] = {}
1057
  for frame_num, bbox in zip(tracks[scene_num][i]['track']['frame'], tracks[scene_num][i]['track']['bbox']):
1058
  track_dict[scene_num][i][frame_num] = bbox
1059
+ print("Successfully loaded the extracted person-tracks")
1060
 
1061
  # Get the total number of scenes
1062
  test_scenes = os.listdir("{}/crops".format(result_folder_input))
 
1065
  # Load the trained model
1066
  model = Transformer_RGB()
1067
  model = load_checkpoint(CHECKPOINT_PATH, model)
1068
+ print("Successfully loaded the model")
1069
 
1070
  # Compute the active speaker in each scene
1071
  output_tracks = {}
 
1154
  output_tracks[frame] = track_dict[scene_num][label][frame]
1155
 
1156
  # Save the output video
1157
+ print("Generating active-speaker detection output video...")
1158
  video_output, status = save_video(output_tracks, orig_frames.copy(), orig_wav_file, result_folder_output)
1159
  if status != "success":
1160
  return None, status
preprocess/inference_preprocess.py CHANGED
@@ -197,7 +197,7 @@ def inference_video(opt, padding=0):
197
  fidx += 1
198
  yield
199
 
200
- for _ in tqdm(generator()):
201
  pass
202
 
203
 
 
197
  fidx += 1
198
  yield
199
 
200
+ for _ in tqdm(generate_detections()):
201
  pass
202
 
203