Spaces:

Blane187
/

audio-mouth

Running

App Files Files Community

Blane187 commited on Sep 28, 2024

Commit

96b6a47

•

1 Parent(s): 3df09ed

Create app.py

Browse files

Files changed (1) hide show

app.py +164 -0

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import os, random, json
+import numpy as np
+from pydub import AudioSegment
+from pydub.utils import make_chunks
+from pydub.effects import compress_dynamic_range
+from PIL import Image
+import cv2
+from moviepy.editor import VideoClip, AudioFileClip
+import gradio as gr
+# Load configuration
+def load_config(config_path):
+    with open(config_path, 'r') as config_file:
+        return json.load(config_file)
+def process_audio_and_generate_video(config_path, audio_file):
+    config = load_config(config_path)
+    # Load the images
+    closed_mouth_img = Image.open(config['frame_paths']['closed_mouth'])
+    open_mouth_img = Image.open(config['frame_paths']['open_mouth'])
+    closed_mouth_blinking_img = Image.open(config['frame_paths']['closed_mouth_blinking'])
+    open_mouth_blinking_img = Image.open(config['frame_paths']['open_mouth_blinking'])
+    # Create a background with the color from config
+    background_color = tuple(config['background_color'])
+    background = Image.new('RGBA', closed_mouth_img.size, background_color)
+    # Composite the images with the background
+    closed_mouth_img = Image.alpha_composite(background, closed_mouth_img)
+    open_mouth_img = Image.alpha_composite(background, open_mouth_img)
+    closed_mouth_blinking_img = Image.alpha_composite(background, closed_mouth_blinking_img)
+    open_mouth_blinking_img = Image.alpha_composite(background, open_mouth_blinking_img)
+    # Convert images to OpenCV format
+    closed_mouth_cv = cv2.cvtColor(np.array(closed_mouth_img), cv2.COLOR_RGBA2RGB)
+    open_mouth_cv = cv2.cvtColor(np.array(open_mouth_img), cv2.COLOR_RGBA2RGB)
+    closed_mouth_blinking_cv = cv2.cvtColor(np.array(closed_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
+    open_mouth_blinking_cv = cv2.cvtColor(np.array(open_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
+    # Set parameters
+    frame_rate = config['frame_rate']
+    frame_duration_ms = config['frame_duration_ms'] // frame_rate
+    # Load the audio
+    audio = AudioSegment.from_file(audio_file)
+    # Apply compression
+    compressed_audio = compress_dynamic_range(audio, threshold=-20.0, ratio=8.0, attack=1.0, release=10.0)
+    # Normalize audio
+    target_dBFS = -10.0
+    change_in_dBFS = target_dBFS - compressed_audio.dBFS
+    normalized_audio = compressed_audio.apply_gain(change_in_dBFS)
+    # Split the audio into chunks of the same duration as the frames
+    audio_chunks = make_chunks(normalized_audio, frame_duration_ms)
+    # Function to calculate decibels of a chunk
+    def calculate_decibels(chunk):
+        return chunk.dBFS
+    # Decide whether to use dynamic threshold or a fixed threshold
+    if config["dynamic_threshold"] == 1:
+        # Calculate average decibels
+        average_dBFS = sum(chunk.dBFS for chunk in audio_chunks) / len(audio_chunks)
+        decibel_threshold = average_dBFS + 4  # Set threshold above average
+    else:
+        decibel_threshold = config['decibel_threshold']
+    # Blink logic
+    blink_duration = config['blink_duration']
+    last_blink_time = config['initial_blink_time']
+    # Decide whether to blink
+    def should_blink(t, last_blink_time):
+        if t - last_blink_time > random.uniform(config['minimum_blinking_delay'], config['maximum_blinking_delay']):
+            return True
+        return False
+    # Function to generate frames
+    def make_frame(t):
+        nonlocal last_blink_time
+        frame_index = int(t * frame_rate)
+        if should_blink(t, last_blink_time):
+            last_blink_time = t
+        if 0 <= (t - last_blink_time) <= blink_duration:
+            if frame_index < len(audio_chunks):
+                chunk = audio_chunks[frame_index]
+                decibels = calculate_decibels(chunk)
+                return open_mouth_blinking_cv if decibels > decibel_threshold else closed_mouth_blinking_cv
+            else:
+                return closed_mouth_blinking_cv
+        if frame_index < len(audio_chunks):
+            chunk = audio_chunks[frame_index]
+            decibels = calculate_decibels(chunk)
+            return open_mouth_cv if decibels > decibel_threshold else closed_mouth_cv
+        else:
+            return closed_mouth_cv
+    # Create a video clip
+    video_clip = VideoClip(make_frame, duration=len(audio_chunks) / frame_rate)
+    # Load the audio
+    audio_clip = AudioFileClip(audio_file)
+    # Set the audio of the video to the loaded audio
+    video_with_audio = video_clip.set_audio(audio_clip)
+    # Write the final video with audio
+    output_video_path = os.path.join(config['output_path'], f"{os.path.basename(audio_file).split('.')[0]}.mp4")
+    video_with_audio.write_videofile(output_video_path, fps=frame_rate, codec=config['codec'], audio_codec=config["audio_codec"])
+    return output_video_path
+html_content = """
+<h3>How to Use</h3>
+<p>Add 1-4 images in the <b>frames</b> folder and modify the paths in the <b>config.json</b> to use the images you want.<br>
+Put the audios into the <b>audio</b> folder. It will create as many animations as there are audios.</p>
+<h3>Frame Images:</h3>
+<table>
+  <tr>
+    <th>Closed Mouth</th>
+    <th>Closed Mouth Blinking</th>
+    <th>Open Mouth</th>
+    <th>Open Mouth Blinking</th>
+  </tr>
+  <tr>
+    <td><img src="https://github.com/user-attachments/assets/3ed0c597-df0e-4165-98d4-cf978e1338bb" alt="closed_mouth" width="150"/></td>
+    <td><img src="https://github.com/user-attachments/assets/1296c2a7-4304-4935-b398-4ee5e1fe8a10" alt="closed_mouth_blinking" width="150"/></td>
+    <td><img src="https://github.com/user-attachments/assets/4715a73a-1a27-4ac9-a20b-954dde0aac0b" alt="open_mouth" width="150"/></td>
+    <td><img src="https://github.com/user-attachments/assets/b7d04648-9158-4dd2-889c-27c67a64e0b2" alt="open_mouth_blinking" width="150"/></td>
+  </tr>
+</table>
+<a href="https://github.com/user-attachments/assets/dcf3728c-0d3b-455d-b17e-5e9819be069b">Download the assets here</a>
+"""
+# Gradio interface
+def gradio_interface(config_file, audio_file):
+    video_path = process_audio_and_generate_video(config_file, audio_file)
+    return video_path
+with gr.Blocks() as demo:
+    gr.HTML(html_content)
+    config_file_input = gr.File(label="Upload Config File (JSON)")
+    audio_file_input = gr.Audio(label="Upload Audio File", type="filepath")
+    output_video = gr.Video(label="Generated Video")
+    generate_button = gr.Button("Generate Animation")
+    generate_button.click(gradio_interface, [config_file_input, audio_file_input], output_video)
+demo.launch()