VinitT commited on
Commit
7575d3e
·
verified ·
1 Parent(s): b63e96a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -18
app.py CHANGED
@@ -1,3 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
3
  from PIL import Image
@@ -58,25 +142,27 @@ def main():
58
  if uploaded_files:
59
  user_question = st.text_input("Ask a question about the images or videos:")
60
  if user_question:
61
- for uploaded_file in uploaded_files:
62
- file_type = uploaded_file.type.split('/')[0]
63
- if file_type == 'image':
64
- image = process_image(uploaded_file)
65
- st.image(image, caption='Uploaded Image.', use_column_width=True)
66
- st.write("Generating description...")
67
- elif file_type == 'video':
68
- image = process_video(uploaded_file)
69
- if image is None:
70
- st.error("Failed to read the video file.")
 
 
 
 
 
 
 
71
  continue
72
- st.image(image, caption='First Frame of Uploaded Video.', use_column_width=True)
73
- st.write("Generating description...")
74
- else:
75
- st.error("Unsupported file type.")
76
- continue
77
- description = generate_description(processor, model, device, image, user_question)
78
- st.write("Description:")
79
- st.write(description)
80
 
81
  if __name__ == "__main__":
82
  main()
 
1
+ # import streamlit as st
2
+ # from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
3
+ # from PIL import Image
4
+ # import torch
5
+ # import cv2
6
+ # import tempfile
7
+
8
+ # def load_model_and_processor():
9
+ # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
10
+ # model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
11
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ # model.to(device)
13
+ # return processor, model, device
14
+
15
+ # def process_image(uploaded_file):
16
+ # image = Image.open(uploaded_file)
17
+ # image = image.resize((512, 512))
18
+ # return image
19
+
20
+ # def process_video(uploaded_file):
21
+ # tfile = tempfile.NamedTemporaryFile(delete=False)
22
+ # tfile.write(uploaded_file.read())
23
+ # cap = cv2.VideoCapture(tfile.name)
24
+ # ret, frame = cap.read()
25
+ # cap.release()
26
+ # if not ret:
27
+ # return None
28
+ # image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
29
+ # image = image.resize((512, 512))
30
+ # return image
31
+
32
+ # def generate_description(processor, model, device, image, user_question):
33
+ # messages = [
34
+ # {
35
+ # "role": "user",
36
+ # "content": [
37
+ # {
38
+ # "type": "image",
39
+ # "image": image,
40
+ # },
41
+ # {"type": "text", "text": user_question},
42
+ # ],
43
+ # }
44
+ # ]
45
+ # text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
46
+ # inputs = processor(text=[text], images=[image], padding=True, return_tensors="pt")
47
+ # inputs = inputs.to(device)
48
+ # generated_ids = model.generate(**inputs, max_new_tokens=512)
49
+ # generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
50
+ # output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)
51
+ # return output_text[0]
52
+
53
+ # def main():
54
+ # st.title("Media Description Generator")
55
+ # processor, model, device = load_model_and_processor()
56
+ # uploaded_files = st.file_uploader("Choose images or videos...", type=["jpg", "jpeg", "png", "mp4", "avi", "mov"], accept_multiple_files=True)
57
+
58
+ # if uploaded_files:
59
+ # user_question = st.text_input("Ask a question about the images or videos:")
60
+ # if user_question:
61
+ # for uploaded_file in uploaded_files:
62
+ # file_type = uploaded_file.type.split('/')[0]
63
+ # if file_type == 'image':
64
+ # image = process_image(uploaded_file)
65
+ # st.image(image, caption='Uploaded Image.', use_column_width=True)
66
+ # st.write("Generating description...")
67
+ # elif file_type == 'video':
68
+ # image = process_video(uploaded_file)
69
+ # if image is None:
70
+ # st.error("Failed to read the video file.")
71
+ # continue
72
+ # st.image(image, caption='First Frame of Uploaded Video.', use_column_width=True)
73
+ # st.write("Generating description...")
74
+ # else:
75
+ # st.error("Unsupported file type.")
76
+ # continue
77
+ # description = generate_description(processor, model, device, image, user_question)
78
+ # st.write("Description:")
79
+ # st.write(description)
80
+
81
+ # if __name__ == "__main__":
82
+ # main()
83
+
84
+
85
  import streamlit as st
86
  from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
87
  from PIL import Image
 
142
  if uploaded_files:
143
  user_question = st.text_input("Ask a question about the images or videos:")
144
  if user_question:
145
+ generate_button = st.button("Generate Descriptions")
146
+ if generate_button:
147
+ for uploaded_file in uploaded_files:
148
+ file_type = uploaded_file.type.split('/')[0]
149
+ if file_type == 'image':
150
+ image = process_image(uploaded_file)
151
+ st.image(image, caption='Uploaded Image.', use_column_width=True)
152
+ st.write("Generating description...")
153
+ elif file_type == 'video':
154
+ image = process_video(uploaded_file)
155
+ if image is None:
156
+ st.error("Failed to read the video file.")
157
+ continue
158
+ st.image(image, caption='First Frame of Uploaded Video.', use_column_width=True)
159
+ st.write("Generating description...")
160
+ else:
161
+ st.error("Unsupported file type.")
162
  continue
163
+ description = generate_description(processor, model, device, image, user_question)
164
+ st.write("Description:")
165
+ st.write(description)
 
 
 
 
 
166
 
167
  if __name__ == "__main__":
168
  main()