Blane187 commited on
Commit
96b6a47
1 Parent(s): 3df09ed

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, random, json
2
+ import numpy as np
3
+ from pydub import AudioSegment
4
+ from pydub.utils import make_chunks
5
+ from pydub.effects import compress_dynamic_range
6
+ from PIL import Image
7
+ import cv2
8
+ from moviepy.editor import VideoClip, AudioFileClip
9
+ import gradio as gr
10
+
11
+ # Load configuration
12
+ def load_config(config_path):
13
+ with open(config_path, 'r') as config_file:
14
+ return json.load(config_file)
15
+
16
+ def process_audio_and_generate_video(config_path, audio_file):
17
+ config = load_config(config_path)
18
+
19
+ # Load the images
20
+ closed_mouth_img = Image.open(config['frame_paths']['closed_mouth'])
21
+ open_mouth_img = Image.open(config['frame_paths']['open_mouth'])
22
+ closed_mouth_blinking_img = Image.open(config['frame_paths']['closed_mouth_blinking'])
23
+ open_mouth_blinking_img = Image.open(config['frame_paths']['open_mouth_blinking'])
24
+
25
+ # Create a background with the color from config
26
+ background_color = tuple(config['background_color'])
27
+ background = Image.new('RGBA', closed_mouth_img.size, background_color)
28
+
29
+ # Composite the images with the background
30
+ closed_mouth_img = Image.alpha_composite(background, closed_mouth_img)
31
+ open_mouth_img = Image.alpha_composite(background, open_mouth_img)
32
+ closed_mouth_blinking_img = Image.alpha_composite(background, closed_mouth_blinking_img)
33
+ open_mouth_blinking_img = Image.alpha_composite(background, open_mouth_blinking_img)
34
+
35
+ # Convert images to OpenCV format
36
+ closed_mouth_cv = cv2.cvtColor(np.array(closed_mouth_img), cv2.COLOR_RGBA2RGB)
37
+ open_mouth_cv = cv2.cvtColor(np.array(open_mouth_img), cv2.COLOR_RGBA2RGB)
38
+ closed_mouth_blinking_cv = cv2.cvtColor(np.array(closed_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
39
+ open_mouth_blinking_cv = cv2.cvtColor(np.array(open_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
40
+
41
+ # Set parameters
42
+ frame_rate = config['frame_rate']
43
+ frame_duration_ms = config['frame_duration_ms'] // frame_rate
44
+
45
+ # Load the audio
46
+ audio = AudioSegment.from_file(audio_file)
47
+
48
+ # Apply compression
49
+ compressed_audio = compress_dynamic_range(audio, threshold=-20.0, ratio=8.0, attack=1.0, release=10.0)
50
+
51
+ # Normalize audio
52
+ target_dBFS = -10.0
53
+ change_in_dBFS = target_dBFS - compressed_audio.dBFS
54
+ normalized_audio = compressed_audio.apply_gain(change_in_dBFS)
55
+
56
+ # Split the audio into chunks of the same duration as the frames
57
+ audio_chunks = make_chunks(normalized_audio, frame_duration_ms)
58
+
59
+ # Function to calculate decibels of a chunk
60
+ def calculate_decibels(chunk):
61
+ return chunk.dBFS
62
+
63
+ # Decide whether to use dynamic threshold or a fixed threshold
64
+ if config["dynamic_threshold"] == 1:
65
+ # Calculate average decibels
66
+ average_dBFS = sum(chunk.dBFS for chunk in audio_chunks) / len(audio_chunks)
67
+ decibel_threshold = average_dBFS + 4 # Set threshold above average
68
+ else:
69
+ decibel_threshold = config['decibel_threshold']
70
+
71
+ # Blink logic
72
+ blink_duration = config['blink_duration']
73
+ last_blink_time = config['initial_blink_time']
74
+
75
+ # Decide whether to blink
76
+ def should_blink(t, last_blink_time):
77
+ if t - last_blink_time > random.uniform(config['minimum_blinking_delay'], config['maximum_blinking_delay']):
78
+ return True
79
+ return False
80
+
81
+ # Function to generate frames
82
+ def make_frame(t):
83
+ nonlocal last_blink_time
84
+ frame_index = int(t * frame_rate)
85
+
86
+ if should_blink(t, last_blink_time):
87
+ last_blink_time = t
88
+
89
+ if 0 <= (t - last_blink_time) <= blink_duration:
90
+ if frame_index < len(audio_chunks):
91
+ chunk = audio_chunks[frame_index]
92
+ decibels = calculate_decibels(chunk)
93
+
94
+ return open_mouth_blinking_cv if decibels > decibel_threshold else closed_mouth_blinking_cv
95
+ else:
96
+ return closed_mouth_blinking_cv
97
+
98
+ if frame_index < len(audio_chunks):
99
+ chunk = audio_chunks[frame_index]
100
+ decibels = calculate_decibels(chunk)
101
+
102
+ return open_mouth_cv if decibels > decibel_threshold else closed_mouth_cv
103
+ else:
104
+ return closed_mouth_cv
105
+
106
+ # Create a video clip
107
+ video_clip = VideoClip(make_frame, duration=len(audio_chunks) / frame_rate)
108
+
109
+ # Load the audio
110
+ audio_clip = AudioFileClip(audio_file)
111
+
112
+ # Set the audio of the video to the loaded audio
113
+ video_with_audio = video_clip.set_audio(audio_clip)
114
+
115
+ # Write the final video with audio
116
+ output_video_path = os.path.join(config['output_path'], f"{os.path.basename(audio_file).split('.')[0]}.mp4")
117
+ video_with_audio.write_videofile(output_video_path, fps=frame_rate, codec=config['codec'], audio_codec=config["audio_codec"])
118
+
119
+ return output_video_path
120
+
121
+
122
+ html_content = """
123
+ <h3>How to Use</h3>
124
+ <p>Add 1-4 images in the <b>frames</b> folder and modify the paths in the <b>config.json</b> to use the images you want.<br>
125
+ Put the audios into the <b>audio</b> folder. It will create as many animations as there are audios.</p>
126
+
127
+ <h3>Frame Images:</h3>
128
+ <table>
129
+ <tr>
130
+ <th>Closed Mouth</th>
131
+ <th>Closed Mouth Blinking</th>
132
+ <th>Open Mouth</th>
133
+ <th>Open Mouth Blinking</th>
134
+ </tr>
135
+ <tr>
136
+ <td><img src="https://github.com/user-attachments/assets/3ed0c597-df0e-4165-98d4-cf978e1338bb" alt="closed_mouth" width="150"/></td>
137
+ <td><img src="https://github.com/user-attachments/assets/1296c2a7-4304-4935-b398-4ee5e1fe8a10" alt="closed_mouth_blinking" width="150"/></td>
138
+ <td><img src="https://github.com/user-attachments/assets/4715a73a-1a27-4ac9-a20b-954dde0aac0b" alt="open_mouth" width="150"/></td>
139
+ <td><img src="https://github.com/user-attachments/assets/b7d04648-9158-4dd2-889c-27c67a64e0b2" alt="open_mouth_blinking" width="150"/></td>
140
+ </tr>
141
+ </table>
142
+
143
+
144
+ <a href="https://github.com/user-attachments/assets/dcf3728c-0d3b-455d-b17e-5e9819be069b">Download the assets here</a>
145
+ """
146
+
147
+
148
+
149
+
150
+ # Gradio interface
151
+ def gradio_interface(config_file, audio_file):
152
+ video_path = process_audio_and_generate_video(config_file, audio_file)
153
+ return video_path
154
+
155
+ with gr.Blocks() as demo:
156
+ gr.HTML(html_content)
157
+ config_file_input = gr.File(label="Upload Config File (JSON)")
158
+ audio_file_input = gr.Audio(label="Upload Audio File", type="filepath")
159
+ output_video = gr.Video(label="Generated Video")
160
+
161
+ generate_button = gr.Button("Generate Animation")
162
+ generate_button.click(gradio_interface, [config_file_input, audio_file_input], output_video)
163
+
164
+ demo.launch()