TH9817 commited on
Commit
7741a53
·
verified ·
1 Parent(s): 7c8d6c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -12
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  import numpy as np
4
  from huggingface_hub import hf_hub_download
5
  from transformers import BitsAndBytesConfig, LlavaNextVideoForConditionalGeneration, LlavaNextVideoProcessor
 
6
 
7
 
8
  quantization_config = BitsAndBytesConfig(
@@ -40,11 +41,10 @@ def read_video_pyav(container, indices):
40
  frames.append(frame)
41
  return np.stack([x.to_ndarray(format="rgb24") for x in frames])
42
 
43
- from huggingface_hub import hf_hub_download
44
 
45
  # Download video from the hub
46
  video_path_1 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
47
- video_path_2 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="karate.mp4", repo_type="dataset")
48
 
49
  container = av.open(video_path_1)
50
 
@@ -54,12 +54,12 @@ indices = np.arange(0, total_frames, total_frames / 8).astype(int)
54
  clip_baby = read_video_pyav(container, indices)
55
 
56
 
57
- container = av.open(video_path_2)
58
 
59
  # sample uniformly 8 frames from the video (we can sample more for longer videos)
60
- total_frames = container.streams.video[0].frames
61
- indices = np.arange(0, total_frames, total_frames / 8).astype(int)
62
- clip_karate = read_video_pyav(container, indices)
63
 
64
  # Each "content" is a list of dicts and you can add image/video/text modalities
65
  conversation = [
@@ -83,13 +83,23 @@ conversation_2 = [
83
  ]
84
 
85
  prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
86
- prompt_2 = processor.apply_chat_template(conversation_2, add_generation_prompt=True)
87
 
88
- inputs = processor([prompt, prompt_2], videos=[clip_baby, clip_karate], padding=True, return_tensors="pt").to(model.device)
89
 
90
- generate_kwargs = {"max_new_tokens": 100, "do_sample": True, "top_p": 0.9}
 
91
 
92
- output = model.generate(**inputs, **generate_kwargs)
93
- generated_text = processor.batch_decode(output, skip_special_tokens=True)
 
 
 
 
 
 
 
 
94
 
95
- print(generated_text)
 
 
3
  import numpy as np
4
  from huggingface_hub import hf_hub_download
5
  from transformers import BitsAndBytesConfig, LlavaNextVideoForConditionalGeneration, LlavaNextVideoProcessor
6
+ import gradio as gr
7
 
8
 
9
  quantization_config = BitsAndBytesConfig(
 
41
  frames.append(frame)
42
  return np.stack([x.to_ndarray(format="rgb24") for x in frames])
43
 
 
44
 
45
  # Download video from the hub
46
  video_path_1 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
47
+ #video_path_2 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="karate.mp4", repo_type="dataset")
48
 
49
  container = av.open(video_path_1)
50
 
 
54
  clip_baby = read_video_pyav(container, indices)
55
 
56
 
57
+ #container = av.open(video_path_2)
58
 
59
  # sample uniformly 8 frames from the video (we can sample more for longer videos)
60
+ #total_frames = container.streams.video[0].frames
61
+ #indices = np.arange(0, total_frames, total_frames / 8).astype(int)
62
+ #clip_karate = read_video_pyav(container, indices)
63
 
64
  # Each "content" is a list of dicts and you can add image/video/text modalities
65
  conversation = [
 
83
  ]
84
 
85
  prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
86
+ #prompt_2 = processor.apply_chat_template(conversation_2, add_generation_prompt=True)
87
 
88
+ inputs = processor(prompt, videos=clip_baby, padding=True, return_tensors="pt").to(model.device)
89
 
90
+ def chat(i):
91
+ generate_kwargs = {"max_new_tokens": i, "do_sample": True, "top_p": 0.9}
92
 
93
+ output = model.generate(**inputs, **generate_kwargs)
94
+ generated_text = processor.batch_decode(output, skip_special_tokens=True)
95
+
96
+ return"answer"+generated_text
97
+
98
+ demo = gr.Interface(
99
+ fn=chat,
100
+ inputs=[gr.Slider(100,300)],
101
+ outputs=["text"],
102
+ )
103
 
104
+ # 起動
105
+ demo.launch()