fffiloni commited on
Commit
4b738f1
1 Parent(s): aac1eb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -7
app.py CHANGED
@@ -14,10 +14,6 @@ pipe.to("cuda")
14
  # if using torch < 2.0
15
  # pipe.enable_xformers_memory_efficient_attention()
16
 
17
-
18
-
19
-
20
-
21
  from pydub import AudioSegment
22
 
23
  def cut_audio(input_path, output_path, max_duration=30000):
@@ -77,16 +73,40 @@ def infer(audio_file):
77
 
78
  images = pipe(prompt=result).images[0]
79
 
80
- return cap_result, result, images
 
81
 
82
- with gr.Blocks() as demo:
 
 
 
83
  with gr.Column(elem_id="col-container"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  audio_input = gr.Audio(type="filepath", source="upload")
85
  infer_btn = gr.Button("Generate")
86
  lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
87
  llama_trans_cap = gr.Textbox(label="Llama translation")
88
  img_result = gr.Image(label="Result")
89
 
90
- infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
 
91
 
92
  demo.queue().launch()
 
14
  # if using torch < 2.0
15
  # pipe.enable_xformers_memory_efficient_attention()
16
 
 
 
 
 
17
  from pydub import AudioSegment
18
 
19
  def cut_audio(input_path, output_path, max_duration=30000):
 
73
 
74
  images = pipe(prompt=result).images[0]
75
 
76
+ #return cap_result, result, images
77
+ return images
78
 
79
+ css = """
80
+ #col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
81
+ """
82
+ with gr.Blocks(css=css) as demo:
83
  with gr.Column(elem_id="col-container"):
84
+ gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
85
+ <div
86
+ style="
87
+ display: inline-flex;
88
+ align-items: center;
89
+ gap: 0.8rem;
90
+ font-size: 1.75rem;
91
+ "
92
+ >
93
+ <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
94
+ Music To Image
95
+ </h1>
96
+ </div>
97
+ <p style="margin-bottom: 10px; font-size: 94%">
98
+ Sends an audio in to <a href="https://huggingface.co/spaces/seungheondoh/LP-Music-Caps-demo" target="_blank">LP-Music-Caps</a>
99
+ to generate a audio cpation which is then translated to an illustrative image description with Llama2, then run through
100
+ Stable Diffusion XL to generate an image from the audio !
101
+ </p>
102
+ </div>""")
103
  audio_input = gr.Audio(type="filepath", source="upload")
104
  infer_btn = gr.Button("Generate")
105
  lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
106
  llama_trans_cap = gr.Textbox(label="Llama translation")
107
  img_result = gr.Image(label="Result")
108
 
109
+ #infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
110
+ infer_btn.click(fn=infer, inputs=[audio_input], outputs=[img_result])
111
 
112
  demo.queue().launch()