fffiloni commited on
Commit
768b4f3
·
verified ·
1 Parent(s): 30b90d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -21
app.py CHANGED
@@ -5,9 +5,25 @@ import json
5
  import re
6
  from moviepy.editor import VideoFileClip
7
  from moviepy.audio.AudioClip import AudioClip
 
8
 
9
  hf_token = os.environ.get("HF_TKN")
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def extract_audio(video_in):
12
  input_video = video_in
13
  output_audio = 'audio.wav'
@@ -136,21 +152,27 @@ def get_tango(prompt):
136
  print(result)
137
  return result
138
 
139
- def infer(image_in, chosen_model):
 
 
 
 
 
 
 
 
140
  caption = get_caption(image_in)
141
  if chosen_model == "MAGNet" :
142
- magnet_result = get_magnet(caption)
143
- return magnet_result
144
  elif chosen_model == "AudioLDM-2" :
145
- audioldm_result = get_audioldm(caption)
146
- return audioldm_result
147
  elif chosen_model == "AudioGen" :
148
- audiogen_result = get_audiogen(caption)
149
- return audiogen_result
150
  elif chosen_model == "Tango" :
151
- tango_result = get_tango(caption)
152
- return tango_result
153
-
 
154
  css="""
155
  #col-container{
156
  margin: 0 auto;
@@ -162,25 +184,28 @@ with gr.Blocks(css=css) as demo:
162
  with gr.Column(elem_id="col-container"):
163
  gr.HTML("""
164
  <h2 style="text-align: center;">
165
- Image to SFX
166
  </h2>
167
  <p style="text-align: center;">
168
- Compare sound effects generation models from image caption.
169
  </p>
170
  """)
 
 
171
 
172
- with gr.Column():
173
- image_in = gr.Image(sources=["upload"], type="filepath", label="Image input", value="oiseau.png")
174
- with gr.Row():
175
- chosen_model = gr.Dropdown(label="Choose a model", choices=["MAGNet", "AudioLDM-2", "AudioGen", "Tango"], value="AudioLDM-2")
176
- submit_btn = gr.Button("Submit")
177
- with gr.Column():
178
- audio_o = gr.Audio(label="Audio output")
 
179
 
180
  submit_btn.click(
181
  fn=infer,
182
- inputs=[image_in, chosen_model],
183
- outputs=[audio_o],
184
  concurrency_limit = 2
185
  )
186
 
 
5
  import re
6
  from moviepy.editor import VideoFileClip
7
  from moviepy.audio.AudioClip import AudioClip
8
+ import cv2
9
 
10
  hf_token = os.environ.get("HF_TKN")
11
 
12
+ def extract_firstframe():
13
+ vidcap = cv2.VideoCapture('yourvideo.mp4') # replace yourvideo.mp4 with actual filename of your video
14
+ success,image = vidcap.read()
15
+ count = 0
16
+ while success:
17
+ if count == 0:
18
+ cv2.imwrite("first_frame.jpg", image) # save first extracted frame as jpg file named first_frame.jpg
19
+ else:
20
+ break # exit loop after saving first frame
21
+ success,image = vidcap.read()
22
+ print ('Read a new frame: ', success)
23
+ count += 1
24
+ print ("Done extracted first frame!")
25
+ return "first_frame.jpg"
26
+
27
  def extract_audio(video_in):
28
  input_video = video_in
29
  output_audio = 'audio.wav'
 
152
  print(result)
153
  return result
154
 
155
+ def blend_vsfx(video_in, audio_result):
156
+ audioClip = AudioFileClip(audio_result)
157
+ clip = VideoFileClip(video_in)
158
+ final_clip = clip.set_audio(audioClip)
159
+ final_clip.write_videofile('final_video_with_sound.mp4')
160
+ return "final_video_with_sound.mp4"
161
+
162
+ def infer(video_in, chosen_model):
163
+ image_in = extract_firstframe(video_in)
164
  caption = get_caption(image_in)
165
  if chosen_model == "MAGNet" :
166
+ audio_result = get_magnet(caption)
 
167
  elif chosen_model == "AudioLDM-2" :
168
+ audio_result = get_audioldm(caption)
 
169
  elif chosen_model == "AudioGen" :
170
+ audio_result = get_audiogen(caption)
 
171
  elif chosen_model == "Tango" :
172
+ audio_result = get_tango(caption)
173
+
174
+ final_res = blend_vsfx(video_in, audio_result)
175
+ return audio_result, final_res
176
  css="""
177
  #col-container{
178
  margin: 0 auto;
 
184
  with gr.Column(elem_id="col-container"):
185
  gr.HTML("""
186
  <h2 style="text-align: center;">
187
+ Video to SoundFX
188
  </h2>
189
  <p style="text-align: center;">
190
+ Get sound effectsfor from video while comparing models from image caption.
191
  </p>
192
  """)
193
+
194
+ with gr.Row():
195
 
196
+ with gr.Column():
197
+ video_in = gr.Video(sources=["upload"], type="filepath", label="Video input")
198
+ with gr.Row():
199
+ chosen_model = gr.Dropdown(label="Choose a model", choices=["MAGNet", "AudioLDM-2", "AudioGen", "Tango"], value="AudioLDM-2")
200
+ submit_btn = gr.Button("Submit")
201
+ with gr.Column():
202
+ audio_o = gr.Audio(label="Audio output")
203
+ video_o gr.Video(label="Video with soundFX")
204
 
205
  submit_btn.click(
206
  fn=infer,
207
+ inputs=[video_in_in, chosen_model],
208
+ outputs=[audio_o, video_o],
209
  concurrency_limit = 2
210
  )
211