k4d3 commited on
Commit
125b361
1 Parent(s): 1b6e1a9

audiogen, png2mp4

Browse files

Signed-off-by: Balazs Horvath <acsipont@gmail.com>

Files changed (2) hide show
  1. audiogen_medium.py +18 -0
  2. zsh/png2mp4.zsh +30 -48
audiogen_medium.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import torchaudio
3
+ from audiocraft.models import AudioGen
4
+ from audiocraft.data.audio import audio_write
5
+
6
+ model = AudioGen.get_pretrained('facebook/audiogen-medium')
7
+ model.set_generation_params(duration=5) # generate 5 seconds.
8
+ descriptions = sys.argv[1:]
9
+ if not descriptions:
10
+ print('At least one prompt should be provided')
11
+ sys.exit(1)
12
+ wav = model.generate(descriptions) # generates 3 samples.
13
+
14
+ for one_wav, description in zip(wav, descriptions):
15
+ file_name = description.replace(' ', '_')
16
+ # Will save under {idx}.wav, with loudness normalization at -14 db LUFS.
17
+ audio_write(file_name, one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)
18
+
zsh/png2mp4.zsh CHANGED
@@ -10,81 +10,63 @@
10
  #
11
  # This function:
12
  # 1. Deactivates conda environment
13
- # 2. Creates a temporary directory for processing
14
- # 3. Finds all PNG files in the current directory
15
- # 4. Uses the current directory name as the output filename prefix
16
- # 5. Copies and optionally repeats images to the temp directory
17
- # 6. Uses ffmpeg to create an MP4 with x265 encoding, including:
18
  # - Frame rate of 60 fps
19
  # - Image scaling to 1024x1024
20
  # - Step count overlay text (divided by 1,000,000 and truncated to remove 6 zeros and decimal places)
21
  # - High-quality encoding settings
22
- # 7. Adds padding and fade-out effect to the final video
23
- # 8. Cleans up temporary files
24
- # 9. Reactivates conda environment
25
  #
26
  # Requirements:
27
  # - ffmpeg with libx265 support
28
- # - bc (basic calculator)
29
  # - Inconsolata-Light.ttf font in /usr/share/fonts/TTF/
30
  png2mp4_x265() {
31
  conda deactivate
32
- local max_images=""
33
  local step_multiplier=1
34
  local repeat=1
35
- local temp_dir="/home/kade/.local/tmp"
36
  while [[ "$#" -gt 0 ]]; do
37
  case $1 in
38
- --max) max_images="$2"; shift ;;
39
  --step) step_multiplier="$2"; shift ;;
40
  --repeat) repeat="$2"; shift ;;
41
  *) echo "Unknown parameter passed: $1"; return 1 ;;
42
  esac
43
  shift
44
  done
45
- echo "Creating temporary directory..."
46
- mkdir -p "$temp_dir"
47
- echo "Checking for PNG files..."
48
- png_files=($(/usr/bin/env ls *.png 2>/dev/null))
49
- if [ ${#png_files[@]} -eq 0 ]; then
50
- echo "Error: No PNG files found in the current directory."
51
- return 1
52
- fi
53
- echo "Setting output filename..."
54
  output_filename="$(basename "$(pwd)")_x265.mp4"
55
  echo "Output filename: $output_filename"
56
- echo "Creating repeated images..."
57
- for img in "${png_files[@]}"; do
58
- for i in $(seq 1 $repeat); do
59
- cp "$img" "$temp_dir/${img%.*}_${i}.png"
60
- done
61
- done
 
 
62
  echo "Running ffmpeg with x265 encoding..."
63
- if [[ -n "$max_images" ]]; then
64
- dis_is_broken_dedupe_me
65
- ffmpeg -framerate 60 -pattern_type glob -i "$temp_dir/*.png" -vf "scale=1024x1024,select='not(mod(n\,$max_images))',drawtext=fontfile=/usr/share/fonts/TTF/Inconsolata-Light.ttf:text='Steps\: %{eif\:trunc(n*$step_multiplier/$repeat)}':x=10:y=h-th-10:fontsize=24:fontcolor=white" \
66
- -c:v libx265 -preset slower -x265-params "crf=18:qcomp=0.8:aq-mode=3:aq-strength=0.8:deblock=-1,-1" -pix_fmt yuv420p -y "$temp_dir/temp.mp4"
67
- else
68
- ffmpeg -framerate "60/$repeat" -pattern_type glob -i "*.png" -vf "scale=1024x1024,drawtext=fontfile=/usr/share/fonts/TTF/Inconsolata-Light.ttf:text='Steps\: %{eif\\:trunc(n*$step_multiplier/$repeat)\\:u\\:3}':x=10:y=h-th-10:fontsize=24:fontcolor=white" \
69
- -c:v libx265 -preset slower -x265-params "crf=18:qcomp=0.8:aq-mode=3:aq-strength=0.8:deblock=-1,-1" -pix_fmt yuv420p -y "$temp_dir/temp.mp4"
70
- fi
 
 
 
 
 
 
 
71
  if [ $? -ne 0 ]; then
72
  echo "Error: ffmpeg command failed."
73
- rm -rf "$temp_dir"
74
- return 1
75
- fi
76
- echo "Processing final video..."
77
- duration=$(ffmpeg -i "$temp_dir/temp.mp4" 2>&1 | grep 'Duration' | awk '{print $2}' | tr -d , | awk -F: '{print ($1 * 3600) + ($2 * 60) + $3}')
78
- fade_start=$(echo "$duration + 3" | bc)
79
- ffmpeg -i "$temp_dir/temp.mp4" -vf "tpad=stop_mode=clone:stop_duration=8,fade=t=out:st=$fade_start:d=5" \
80
- -c:v libx265 -preset slower -x265-params "crf=18:qcomp=0.8:aq-mode=3:aq-strength=0.8:deblock=-1,-1" -pix_fmt yuv420p -y "$output_filename"
81
- if [ $? -ne 0 ]; then
82
- echo "Error: Final ffmpeg processing failed."
83
- rm -rf "$temp_dir"
84
  return 1
85
  fi
86
- echo "Cleaning up temporary files..."
87
- rm -rf "$temp_dir"
88
  conda activate
89
  echo "Process completed successfully."
90
  }
 
10
  #
11
  # This function:
12
  # 1. Deactivates conda environment
13
+ # 2. Finds all PNG files in the current directory
14
+ # 3. Uses the current directory name as the output filename prefix
15
+ # 4. Uses ffmpeg to create an MP4 with x265 encoding, including:
 
 
16
  # - Frame rate of 60 fps
17
  # - Image scaling to 1024x1024
18
  # - Step count overlay text (divided by 1,000,000 and truncated to remove 6 zeros and decimal places)
19
  # - High-quality encoding settings
20
+ # 5. Adds padding and fade-out effect to the final video
21
+ # 6. Reactivates conda environment
 
22
  #
23
  # Requirements:
24
  # - ffmpeg with libx265 support
 
25
  # - Inconsolata-Light.ttf font in /usr/share/fonts/TTF/
26
  png2mp4_x265() {
27
  conda deactivate
 
28
  local step_multiplier=1
29
  local repeat=1
30
+
31
  while [[ "$#" -gt 0 ]]; do
32
  case $1 in
 
33
  --step) step_multiplier="$2"; shift ;;
34
  --repeat) repeat="$2"; shift ;;
35
  *) echo "Unknown parameter passed: $1"; return 1 ;;
36
  esac
37
  shift
38
  done
 
 
 
 
 
 
 
 
 
39
  output_filename="$(basename "$(pwd)")_x265.mp4"
40
  echo "Output filename: $output_filename"
41
+
42
+
43
+ local nframes=$(find . -type f -name '*.png' | wc -l)
44
+ local duration=$(($nframes * $repeat / 60.))
45
+ local fade_start=$((duration + 3))
46
+ echo "Found $nframes for a duration of $duration seconds"
47
+
48
+
49
  echo "Running ffmpeg with x265 encoding..."
50
+ local font=/usr/share/fonts/TTF/Inconsolata-Light.ttf
51
+ local drawtext="drawtext=fontfile=${font}:text='Steps\: %{eif\\:trunc(n*$step_multiplier)\\:u\\:3}':x=10:y=h-th-10:fontsize=24:fontcolor=white"
52
+ local fadeout="tpad=stop_mode=clone:stop_duration=8,fade=t=out:st=${fade_start}:d=5"
53
+ local output_preset=(
54
+ -c:v libx265
55
+ -preset slower -tune animation
56
+ -g $repeat
57
+ -pix_fmt yuv420p
58
+ -x265-params "crf=20:qcomp=0.8:aq-mode=3:aq-strength=0.8:deblock=-1,-1"
59
+ )
60
+
61
+ ffmpeg -framerate "60/$repeat" -pattern_type glob -i "*.png" -r 60 -vf "scale=1024x1024,${drawtext},${fadeout}"\
62
+ -c:v ffv1 -f matroska - | ffmpeg -f matroska -i - \
63
+ -vf "$fadeout" "${output_preset[@]}" -y output.mp4
64
+
65
  if [ $? -ne 0 ]; then
66
  echo "Error: ffmpeg command failed."
 
 
 
 
 
 
 
 
 
 
 
67
  return 1
68
  fi
69
+
 
70
  conda activate
71
  echo "Process completed successfully."
72
  }