Politrees commited on
Commit
294aebf
·
verified ·
1 Parent(s): 53a53b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +276 -1
app.py CHANGED
@@ -1,4 +1,279 @@
 
 
 
 
 
 
 
 
 
1
  import os
 
 
2
 
3
  os.system("apt-get install -y fonts-dejavu")
4
- os.system("python steganography.py")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import tempfile
3
+ import gradio as gr
4
+ import librosa
5
+ import librosa.display
6
+ import matplotlib.pyplot as plt
7
+ import numpy as np
8
+ import soundfile as sf
9
+ from PIL import Image, ImageDraw, ImageFont
10
  import os
11
+ import cv2
12
+ from moviepy.editor import VideoFileClip, AudioFileClip
13
 
14
  os.system("apt-get install -y fonts-dejavu")
15
+
16
+ DEFAULT_FONT_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
17
+ DEFAULT_SAMPLE_RATE = 22050
18
+
19
+ logging.basicConfig(level=logging.INFO)
20
+
21
+ def load_font(font_path, max_font_size):
22
+ try:
23
+ return ImageFont.truetype(font_path, max_font_size)
24
+ except IOError:
25
+ logging.warning(f"Font not found at {font_path}. Using default font.")
26
+ return ImageFont.load_default()
27
+ except Exception as e:
28
+ logging.error(f"An error occurred while loading the font: {e}")
29
+ raise
30
+
31
+ def create_text_image(text, font, base_width=512, height=256, margin=10, letter_spacing=5):
32
+ draw = ImageDraw.Draw(Image.new("L", (1, 1)))
33
+ text_widths = [
34
+ draw.textbbox((0, 0), char, font=font)[2] - draw.textbbox((0, 0), char, font=font)[0]
35
+ for char in text
36
+ ]
37
+ text_width = sum(text_widths) + letter_spacing * (len(text) - 1)
38
+ text_height = (
39
+ draw.textbbox((0, 0), text[0], font=font)[3]
40
+ - draw.textbbox((0, 0), text[0], font=font)[1]
41
+ )
42
+
43
+ width = max(base_width, text_width + margin * 2)
44
+ height = max(height, text_height + margin * 2)
45
+
46
+ image = Image.new("L", (width, height), "black")
47
+ draw = ImageDraw.Draw(image)
48
+
49
+ text_start_x = (width - text_width) // 2
50
+ text_start_y = (height - text_height) // 2
51
+
52
+ current_x = text_start_x
53
+ for char, char_width in zip(text, text_widths):
54
+ draw.text((current_x, text_start_y), char, font=font, fill="white")
55
+ current_x += char_width + letter_spacing
56
+
57
+ return np.array(image)
58
+
59
+ def spectrogram_image_to_audio(image, sr=DEFAULT_SAMPLE_RATE):
60
+ flipped_image = np.flipud(image)
61
+ S = flipped_image.astype(np.float32) / 255.0 * 100.0
62
+ y = librosa.griffinlim(S)
63
+ return y
64
+
65
+ def create_audio_with_spectrogram(text, base_width, height, max_font_size, margin, letter_spacing):
66
+ font = load_font(DEFAULT_FONT_PATH, max_font_size)
67
+ spec_image = create_text_image(text, font, base_width, height, margin, letter_spacing)
68
+ y = spectrogram_image_to_audio(spec_image)
69
+
70
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
71
+ audio_path = temp_audio.name
72
+ sf.write(audio_path, y, DEFAULT_SAMPLE_RATE)
73
+
74
+ S = librosa.feature.melspectrogram(y=y, sr=DEFAULT_SAMPLE_RATE)
75
+ S_dB = librosa.power_to_db(S, ref=np.max)
76
+ plt.figure(figsize=(10, 4))
77
+ librosa.display.specshow(S_dB, sr=DEFAULT_SAMPLE_RATE, x_axis="time", y_axis="mel")
78
+ plt.axis("off")
79
+ plt.tight_layout(pad=0)
80
+
81
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_spectrogram:
82
+ spectrogram_path = temp_spectrogram.name
83
+ plt.savefig(spectrogram_path, bbox_inches="tight", pad_inches=0, transparent=True)
84
+ plt.close()
85
+
86
+ return audio_path, spectrogram_path
87
+
88
+ def display_audio_spectrogram(audio_path):
89
+ y, sr = librosa.load(audio_path, sr=None)
90
+ S = librosa.feature.melspectrogram(y=y, sr=sr)
91
+ S_dB = librosa.power_to_db(S, ref=np.max)
92
+
93
+ plt.figure(figsize=(10, 4))
94
+ librosa.display.specshow(S_dB, sr=sr, x_axis="time", y_axis="mel")
95
+ plt.axis("off")
96
+ plt.tight_layout(pad=0)
97
+
98
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_spectrogram:
99
+ spectrogram_path = temp_spectrogram.name
100
+ plt.savefig(spectrogram_path, bbox_inches="tight", pad_inches=0, transparent=True)
101
+ plt.close()
102
+ return spectrogram_path
103
+
104
+ def image_to_spectrogram_audio(image_path, sr=DEFAULT_SAMPLE_RATE):
105
+ image = Image.open(image_path).convert("L")
106
+ image = np.array(image)
107
+ y = spectrogram_image_to_audio(image, sr)
108
+
109
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
110
+ img2audio_path = temp_audio.name
111
+ sf.write(img2audio_path, y, sr)
112
+ return img2audio_path
113
+
114
+ def gradio_interface_fn(text, base_width, height, max_font_size, margin, letter_spacing):
115
+ audio_path, spectrogram_path = create_audio_with_spectrogram(text, base_width, height, max_font_size, margin, letter_spacing)
116
+ return audio_path, spectrogram_path
117
+
118
+ def gradio_image_to_audio_fn(upload_image):
119
+ return image_to_spectrogram_audio(upload_image)
120
+
121
+ def gradio_decode_fn(upload_audio):
122
+ return display_audio_spectrogram(upload_audio)
123
+
124
+ def extract_audio(video_path):
125
+ try:
126
+ video = VideoFileClip(video_path)
127
+ if video.audio is None:
128
+ raise ValueError("No audio found in the video")
129
+ audio_path = "extracted_audio.wav"
130
+ video.audio.write_audiofile(audio_path)
131
+ return audio_path
132
+ except Exception as e:
133
+ logging.error(f"Failed to extract audio: {e}")
134
+ return None
135
+
136
+ def extract_frames(video_path):
137
+ try:
138
+ video = cv2.VideoCapture(video_path)
139
+ frames = []
140
+ success, frame = video.read()
141
+ while success:
142
+ frames.append(frame)
143
+ success, frame = video.read()
144
+ video.release()
145
+ return frames
146
+ except Exception as e:
147
+ logging.error(f"Failed to extract frames: {e}")
148
+ return None
149
+
150
+ def frame_to_spectrogram(frame, sr=DEFAULT_SAMPLE_RATE):
151
+ gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
152
+ S = np.flipud(gray_frame.astype(np.float32) / 255.0 * 100.0)
153
+ y = librosa.griffinlim(S)
154
+ return y
155
+
156
+ def save_audio(y, sr=DEFAULT_SAMPLE_RATE):
157
+ audio_path = 'output_frame_audio.wav'
158
+ sf.write(audio_path, y, sr)
159
+ return audio_path
160
+
161
+ def save_spectrogram_image(S, frame_number, temp_dir):
162
+ plt.figure(figsize=(10, 4))
163
+ librosa.display.specshow(S)
164
+ plt.tight_layout()
165
+ image_path = os.path.join(temp_dir, f'spectrogram_frame_{frame_number}.png')
166
+ plt.savefig(image_path)
167
+ plt.close()
168
+ return image_path
169
+
170
+ def process_video_frames(frames, sr=DEFAULT_SAMPLE_RATE, temp_dir=None):
171
+ processed_frames = []
172
+ total_frames = len(frames)
173
+ for i, frame in enumerate(frames):
174
+ y = frame_to_spectrogram(frame, sr)
175
+ S = librosa.feature.melspectrogram(y=y, sr=sr)
176
+ image_path = save_spectrogram_image(S, i, temp_dir)
177
+ processed_frame = cv2.imread(image_path)
178
+ processed_frames.append(processed_frame)
179
+ return processed_frames
180
+
181
+ def save_video_from_frames(frames, output_path, fps=30):
182
+ height, width, layers = frames[0].shape
183
+ video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
184
+ for frame in frames:
185
+ video.write(frame)
186
+ video.release()
187
+
188
+ def add_audio_to_video(video_path, audio_path, output_path):
189
+ try:
190
+ video = VideoFileClip(video_path)
191
+ audio = AudioFileClip(audio_path)
192
+ final_video = video.set_audio(audio)
193
+ final_video.write_videofile(output_path, codec='libx264', audio_codec='aac')
194
+ except Exception as e:
195
+ logging.error(f"Failed to add audio to video: {e}")
196
+
197
+ def process_video(video_path):
198
+ try:
199
+ video = VideoFileClip(video_path)
200
+ if video.duration > 10:
201
+ video = video.subclip(0, 10)
202
+ temp_trimmed_video_path = "trimmed_video.mp4"
203
+ video.write_videofile(temp_trimmed_video_path, codec='libx264')
204
+ video_path = temp_trimmed_video_path
205
+ except Exception as e:
206
+ return f"Failed to load video: {e}"
207
+
208
+ audio_path = extract_audio(video_path)
209
+ if audio_path is None:
210
+ return "Failed to extract audio from video."
211
+ frames = extract_frames(video_path)
212
+ if frames is None:
213
+ return "Failed to extract frames from video."
214
+
215
+ with tempfile.TemporaryDirectory() as temp_dir:
216
+ processed_frames = process_video_frames(frames, temp_dir=temp_dir)
217
+ temp_video_path = os.path.join(temp_dir, 'processed_video.mp4')
218
+ save_video_from_frames(processed_frames, temp_video_path)
219
+ output_video_path = 'output_video_with_audio.mp4'
220
+ add_audio_to_video(temp_video_path, audio_path, output_video_path)
221
+ return output_video_path
222
+
223
+ def create_gradio_interface():
224
+ with gr.Blocks(title="Audio Steganography", css="footer{display:none !important}", theme=gr.themes.Soft(primary_hue="green", secondary_hue="green", spacing_size="sm", radius_size="lg")) as txt2spec:
225
+ with gr.Tab("Text to Spectrogram"):
226
+ with gr.Group():
227
+ text = gr.Textbox(lines=2, placeholder="Enter your text:", label="Text", info="Enter the text you want to convert to audio.")
228
+ with gr.Row(variant="panel"):
229
+ base_width = gr.Slider(value=512, label="Image Width", visible=False)
230
+ height = gr.Slider(value=256, label="Image Height", visible=False)
231
+ max_font_size = gr.Slider(minimum=10, maximum=130, step=5, value=80, label="Font size")
232
+ margin = gr.Slider(minimum=0, maximum=50, step=1, value=10, label="Indent")
233
+ letter_spacing = gr.Slider(minimum=0, maximum=50, step=1, value=5, label="Letter spacing")
234
+ generate_button = gr.Button("Generate", variant="primary", size="lg")
235
+
236
+ with gr.Column(variant="panel"):
237
+ with gr.Group():
238
+ output_audio = gr.Audio(type="filepath", label="Generated audio")
239
+ output_spectrogram = gr.Image(type="filepath", label="Spectrogram")
240
+
241
+ generate_button.click(gradio_interface_fn, inputs=[text, base_width, height, max_font_size, margin, letter_spacing], outputs=[output_audio, output_spectrogram])
242
+
243
+ with gr.Tab("Image to Spectrogram"):
244
+ with gr.Group():
245
+ with gr.Column():
246
+ upload_image = gr.Image(type="filepath", label="Upload image")
247
+ convert_button = gr.Button("Convert to audio", variant="primary", size="lg")
248
+
249
+ with gr.Column(variant="panel"):
250
+ output_audio_from_image = gr.Audio(type="filepath", label="Generated audio")
251
+
252
+ convert_button.click(gradio_image_to_audio_fn, inputs=[upload_image], outputs=[output_audio_from_image])
253
+
254
+ with gr.Tab("Audio to Spectrogram"):
255
+ with gr.Group():
256
+ with gr.Column():
257
+ upload_audio = gr.Audio(type="filepath", label="Upload audio", scale=3)
258
+ decode_button = gr.Button("Show spectrogram", variant="primary", size="lg")
259
+
260
+ with gr.Column(variant="panel"):
261
+ decoded_image = gr.Image(type="filepath", label="Audio Spectrogram")
262
+
263
+ decode_button.click(gradio_decode_fn, inputs=[upload_audio], outputs=[decoded_image])
264
+
265
+ with gr.Tab("Video to Spectrogram"):
266
+ with gr.Group():
267
+ video_input = gr.Video(label="Upload video")
268
+ generate_button = gr.Button("Generate", variant="primary", size="lg")
269
+
270
+ with gr.Column(variant="panel"):
271
+ video_output = gr.Video(label="Video Spectrogram")
272
+
273
+ generate_button.click(process_video, inputs=[video_input], outputs=[video_output])
274
+
275
+ return txt2spec
276
+
277
+ if __name__ == "__main__":
278
+ txt2spec = create_gradio_interface()
279
+ txt2spec.launch(share=True)