fffiloni commited on
Commit
027e8a9
·
verified ·
1 Parent(s): 05fccbb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -0
app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_client import Client
3
+
4
+ # 1. extract and store 1 image every 5 images from video input
5
+ # 2. extract audio
6
+ # 3. for each image from extracted_images, get caption from caption model and concatenate into list
7
+ # 4. for audio, ask audio questioning model to describe sound/scene
8
+ # 5. give all to LLM, and ask it to resume, according to image caption list combined to audio caption
9
+
10
+ def extract_image()
11
+
12
+ def get_moondream()
13
+
14
+ def get_salmonn()
15
+
16
+ def llm_process()
17
+
18
+ def infer(video_in):
19
+
20
+ return video_description
21
+
22
+ with gr.Blocks() as demo :
23
+ with gr.Column(elem_id="col-container"):
24
+ gr.HTML("""
25
+ <h2 style="text-align: center;">Video description</h2>
26
+ """)
27
+ video_in = gr.Video(label="Video input")
28
+ submit_btn = gr.Button("SUbmit")
29
+ video_description = gr.Textbox(label="Video description")
30
+ submit_btn.click(
31
+ fn = infer,
32
+ inputs = [video_in],
33
+ outputs = [video_description]
34
+ )
35
+ demo.queue().launch()