fffiloni commited on
Commit
933471e
·
verified ·
1 Parent(s): 5090a91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -17,7 +17,10 @@ zephyr_model = "HuggingFaceH4/zephyr-7b-beta"
17
  pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat16, device_map="auto")
18
 
19
  standard_sys = f"""
20
-
 
 
 
21
  """
22
 
23
  def extract_frames(video_in, interval=24, output_format='.jpg'):
@@ -83,7 +86,7 @@ def extract_audio(video_path):
83
  return "output_audio.mp3"
84
 
85
  def get_salmonn(audio_in):
86
- salmonn_prompt = "Describe sound."
87
  client = Client("fffiloni/SALMONN-7B-gradio")
88
  result = client.predict(
89
  audio_in, # filepath in 'Audio' Audio component
@@ -141,9 +144,9 @@ def infer(video_in):
141
  print(formatted_captions)
142
 
143
  # Send formatted captions to LLM
144
- #video_description_from_llm = llm_process(formatted_captions)
145
 
146
- return formatted_captions
147
 
148
  with gr.Blocks() as demo :
149
  with gr.Column(elem_id="col-container"):
 
17
  pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat16, device_map="auto")
18
 
19
  standard_sys = f"""
20
+ You will be provided a list of visual events, and an audio description. All these informations come from a single video.
21
+ List of visual events are actually images extracted from this video every 12 frames.
22
+ Audio events are actually the description from the audio of the video.
23
+ Your job is to use these information to provide a short resume about what is happening in the video.
24
  """
25
 
26
  def extract_frames(video_in, interval=24, output_format='.jpg'):
 
86
  return "output_audio.mp3"
87
 
88
  def get_salmonn(audio_in):
89
+ salmonn_prompt = "Please describe the audio"
90
  client = Client("fffiloni/SALMONN-7B-gradio")
91
  result = client.predict(
92
  audio_in, # filepath in 'Audio' Audio component
 
144
  print(formatted_captions)
145
 
146
  # Send formatted captions to LLM
147
+ video_description_from_llm = llm_process(formatted_captions)
148
 
149
+ return video_description_from_llm
150
 
151
  with gr.Blocks() as demo :
152
  with gr.Column(elem_id="col-container"):