File size: 12,056 Bytes
19b4ae5
 
 
1d000a8
 
19b4ae5
 
1d000a8
19b4ae5
cf21461
 
 
5145c3f
 
 
1d000a8
f3b050a
 
8fb2a96
5145c3f
8fb2a96
5145c3f
8fb2a96
 
19b4ae5
 
 
 
8fb2a96
19b4ae5
8fb2a96
 
 
 
19b4ae5
8fb2a96
 
 
 
1d000a8
5924dfb
 
1d000a8
19b4ae5
1d000a8
5145c3f
19b4ae5
 
5145c3f
19b4ae5
 
 
 
5145c3f
8fb2a96
1d000a8
5145c3f
1d000a8
 
2b77930
1d000a8
 
76f1d4e
8fb2a96
 
1d000a8
19b4ae5
 
f3b050a
 
19b4ae5
2b77930
4aade4a
 
19b4ae5
 
4aade4a
19b4ae5
 
f3b050a
76f1d4e
4aade4a
19b4ae5
4aade4a
1d000a8
 
5145c3f
2b77930
1d000a8
 
 
19b4ae5
 
4aade4a
1d000a8
76f1d4e
 
 
1d000a8
76f1d4e
1d000a8
bc96c8f
19b4ae5
bc96c8f
 
19b4ae5
 
bc96c8f
 
 
 
0151e44
 
f3b050a
 
bc96c8f
 
f3b050a
 
 
 
cf21461
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fb2a96
 
 
c9496c6
8fb2a96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d000a8
8fb2a96
 
5145c3f
8fb2a96
1d000a8
cf21461
8fb2a96
 
 
 
1d000a8
8fb2a96
 
5145c3f
8fb2a96
1d000a8
cf21461
 
 
 
 
 
 
 
 
 
8fb2a96
5145c3f
8fb2a96
 
cf21461
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
import logging
import tempfile
import gradio as gr
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import soundfile as sf
from PIL import Image, ImageDraw, ImageFont
import os
import cv2
from moviepy.editor import VideoFileClip, AudioFileClip

DEFAULT_FONT_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
DEFAULT_SAMPLE_RATE = 22050

logging.basicConfig(level=logging.INFO)

def load_font(font_path, max_font_size):
    try:
        return ImageFont.truetype(font_path, max_font_size)
    except IOError:
        logging.warning(f"Font not found at {font_path}. Using default font.")
        return ImageFont.load_default()
    except Exception as e:
        logging.error(f"An error occurred while loading the font: {e}")
        raise

def create_text_image(text, font, base_width=512, height=256, margin=10, letter_spacing=5):
    draw = ImageDraw.Draw(Image.new("L", (1, 1)))
    text_widths = [
        draw.textbbox((0, 0), char, font=font)[2] - draw.textbbox((0, 0), char, font=font)[0]
        for char in text
    ]
    text_width = sum(text_widths) + letter_spacing * (len(text) - 1)
    text_height = (
        draw.textbbox((0, 0), text[0], font=font)[3]
        - draw.textbbox((0, 0), text[0], font=font)[1]
    )

    width = max(base_width, text_width + margin * 2)
    height = max(height, text_height + margin * 2)

    image = Image.new("L", (width, height), "black")
    draw = ImageDraw.Draw(image)

    text_start_x = (width - text_width) // 2
    text_start_y = (height - text_height) // 2

    current_x = text_start_x
    for char, char_width in zip(text, text_widths):
        draw.text((current_x, text_start_y), char, font=font, fill="white")
        current_x += char_width + letter_spacing

    return np.array(image)

def spectrogram_image_to_audio(image, sr=DEFAULT_SAMPLE_RATE):
    flipped_image = np.flipud(image)
    S = flipped_image.astype(np.float32) / 255.0 * 100.0
    y = librosa.griffinlim(S)
    return y

def create_audio_with_spectrogram(text, base_width, height, max_font_size, margin, letter_spacing):
    font = load_font(DEFAULT_FONT_PATH, max_font_size)
    spec_image = create_text_image(text, font, base_width, height, margin, letter_spacing)
    y = spectrogram_image_to_audio(spec_image)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
        audio_path = temp_audio.name
        sf.write(audio_path, y, DEFAULT_SAMPLE_RATE)

    S = librosa.feature.melspectrogram(y=y, sr=DEFAULT_SAMPLE_RATE)
    S_dB = librosa.power_to_db(S, ref=np.max)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=DEFAULT_SAMPLE_RATE, x_axis="time", y_axis="mel")
    plt.axis("off")
    plt.tight_layout(pad=0)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_spectrogram:
        spectrogram_path = temp_spectrogram.name
        plt.savefig(spectrogram_path, bbox_inches="tight", pad_inches=0, transparent=True)
    plt.close()

    return audio_path, spectrogram_path

def display_audio_spectrogram(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    S = librosa.feature.melspectrogram(y=y, sr=sr)
    S_dB = librosa.power_to_db(S, ref=np.max)

    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=sr, x_axis="time", y_axis="mel")
    plt.axis("off")
    plt.tight_layout(pad=0)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_spectrogram:
        spectrogram_path = temp_spectrogram.name
        plt.savefig(spectrogram_path, bbox_inches="tight", pad_inches=0, transparent=True)
    plt.close()
    return spectrogram_path

def image_to_spectrogram_audio(image_path, sr=DEFAULT_SAMPLE_RATE):
    image = Image.open(image_path).convert("L")
    image = np.array(image)
    y = spectrogram_image_to_audio(image, sr)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
        img2audio_path = temp_audio.name
        sf.write(img2audio_path, y, sr)
    return img2audio_path

def gradio_interface_fn(text, base_width, height, max_font_size, margin, letter_spacing):
    audio_path, spectrogram_path = create_audio_with_spectrogram(text, base_width, height, max_font_size, margin, letter_spacing)
    return audio_path, spectrogram_path

def gradio_image_to_audio_fn(upload_image):
    return image_to_spectrogram_audio(upload_image)

def gradio_decode_fn(upload_audio):
    return display_audio_spectrogram(upload_audio)


def display_progress(percent, message, progress=gr.Progress()):
    progress(percent, desc=message)

def extract_audio(video_path, progress):
    display_progress(0.1, "Extracting audio from video", progress)
    try:
        video = VideoFileClip(video_path)
        if video.audio is None:
            raise ValueError("No audio found in the video")
        audio_path = "extracted_audio.wav"
        video.audio.write_audiofile(audio_path)
        display_progress(0.2, "Audio extracted", progress)
        return audio_path
    except Exception as e:
        display_progress(0.2, f"Failed to extract audio: {e}", progress)
        return None

def extract_frames(video_path, progress):
    display_progress(0.3, "Extracting frames from video", progress)
    try:
        video = cv2.VideoCapture(video_path)
        frames = []
        success, frame = video.read()
        while success:
            frames.append(frame)
            success, frame = video.read()
        video.release()
        display_progress(0.4, "Frames extracted", progress)
        return frames
    except Exception as e:
        display_progress(0.4, f"Failed to extract frames: {e}", progress)
        return None

def frame_to_spectrogram(frame, sr=DEFAULT_SAMPLE_RATE):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    S = np.flipud(gray_frame.astype(np.float32) / 255.0 * 100.0)
    y = librosa.griffinlim(S)
    return y

def save_audio(y, sr=DEFAULT_SAMPLE_RATE):
    audio_path = 'output_frame_audio.wav'
    sf.write(audio_path, y, sr)
    return audio_path

def save_spectrogram_image(S, frame_number, temp_dir):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S)
    plt.tight_layout()
    image_path = os.path.join(temp_dir, f'spectrogram_frame_{frame_number}.png')
    plt.savefig(image_path)
    plt.close()
    return image_path

def process_video_frames(frames, sr=DEFAULT_SAMPLE_RATE, temp_dir=None, progress=gr.Progress()):
    processed_frames = []
    total_frames = len(frames)
    for i, frame in enumerate(frames):
        y = frame_to_spectrogram(frame, sr)
        S = librosa.feature.melspectrogram(y=y, sr=sr)
        image_path = save_spectrogram_image(S, i, temp_dir)
        processed_frame = cv2.imread(image_path)
        processed_frames.append(processed_frame)
        display_progress(0.5 + int((i + 1) / total_frames * 0.7), f"Frame processing {i + 1}/{total_frames}", progress)
    display_progress(0.8, "All frames processed", progress)
    return processed_frames

def save_video_from_frames(frames, output_path, fps=30):
    height, width, layers = frames[0].shape
    video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    for frame in frames:
        video.write(frame)
    video.release()

def add_audio_to_video(video_path, audio_path, output_path, progress):
    display_progress(0.9, "Adding audio back to video", progress)
    try:
        video = VideoFileClip(video_path)
        audio = AudioFileClip(audio_path)
        final_video = video.set_audio(audio)
        final_video.write_videofile(output_path, codec='libx264', audio_codec='aac')
        display_progress(1, "Video's ready", progress)
    except Exception as e:
        display_progress(1, f"Failed to add audio to video: {e}", progress)

def process_video(video_path, progress=gr.Progress()):
    try:
        video = VideoFileClip(video_path)
        if video.duration > 10:
            video = video.subclip(0, 10)
            temp_trimmed_video_path = "trimmed_video.mp4"
            video.write_videofile(temp_trimmed_video_path, codec='libx264')
            video_path = temp_trimmed_video_path
    except Exception as e:
        return f"Failed to load video: {e}"

    audio_path = extract_audio(video_path, progress)
    if audio_path is None:
        return "Failed to extract audio from video."
    frames = extract_frames(video_path, progress)
    if frames is None:
        return "Failed to extract frames from video."

    with tempfile.TemporaryDirectory() as temp_dir:
        processed_frames = process_video_frames(frames, temp_dir=temp_dir, progress=progress)
        temp_video_path = os.path.join(temp_dir, 'processed_video.mp4')
        save_video_from_frames(processed_frames, temp_video_path)
        output_video_path = 'output_video_with_audio.mp4'
        add_audio_to_video(temp_video_path, audio_path, output_video_path, progress)
        return output_video_path

def create_gradio_interface():
    with gr.Blocks(title="Audio Steganography", css="footer{display:none !important}", theme=gr.themes.Soft(primary_hue="green", secondary_hue="green", spacing_size="sm", radius_size="lg")) as txt2spec:
        with gr.Tab("Text to Spectrogram"):
            with gr.Group():
                text = gr.Textbox(lines=2, placeholder="Enter your text:", label="Text", info="Enter the text you want to convert to audio.")
                with gr.Row(variant="panel"):
                    base_width = gr.Slider(value=512, label="Image Width", visible=False)
                    height = gr.Slider(value=256, label="Image Height", visible=False)
                    max_font_size = gr.Slider(minimum=10, maximum=130, step=5, value=80, label="Font size")
                    margin = gr.Slider(minimum=0, maximum=50, step=1, value=10, label="Indent")
                    letter_spacing = gr.Slider(minimum=0, maximum=50, step=1, value=5, label="Letter spacing")
                generate_button = gr.Button("Generate", variant="primary", size="lg")

            with gr.Column(variant="panel"):
                with gr.Group():
                    output_audio = gr.Audio(type="filepath", label="Generated audio")
                    output_spectrogram = gr.Image(type="filepath", label="Spectrogram")

            generate_button.click(gradio_interface_fn, inputs=[text, base_width, height, max_font_size, margin, letter_spacing], outputs=[output_audio, output_spectrogram])

        with gr.Tab("Image to Spectrogram"):
            with gr.Group():
                with gr.Column():
                    upload_image = gr.Image(type="filepath", label="Upload image")
                    convert_button = gr.Button("Convert to audio", variant="primary", size="lg")

            with gr.Column(variant="panel"):
                output_audio_from_image = gr.Audio(type="filepath", label="Generated audio")

            convert_button.click(gradio_image_to_audio_fn, inputs=[upload_image], outputs=[output_audio_from_image])

        with gr.Tab("Audio to Spectrogram"):
            with gr.Group():
                with gr.Column():
                    upload_audio = gr.Audio(type="filepath", label="Upload audio", scale=3)
                    decode_button = gr.Button("Show spectrogram", variant="primary", size="lg")

            with gr.Column(variant="panel"):
                decoded_image = gr.Image(type="filepath", label="Audio Spectrogram")

            decode_button.click(gradio_decode_fn, inputs=[upload_audio], outputs=[decoded_image])

        with gr.Tab("Video to Spectrogram"):
            with gr.Group():
                video_input = gr.Video(label="Upload video")
                generate_button = gr.Button("Generate", variant="primary", size="lg")

            with gr.Column(variant="panel"):
                video_output = gr.Video(label="Video Spectrogram")

            generate_button.click(process_video, inputs=[video_input], outputs=[video_output])

    return txt2spec

if __name__ == "__main__":
    txt2spec = create_gradio_interface()
    txt2spec.launch(share=True)