ManishThota commited on
Commit
1727755
1 Parent(s): 3b81f0b

Delete both_app.py

Browse files
Files changed (1) hide show
  1. both_app.py +0 -186
both_app.py DELETED
@@ -1,186 +0,0 @@
1
- import warnings
2
- warnings.filterwarnings("ignore")
3
- import gradio as gr
4
- import pandas as pd
5
- import torch
6
- import gc
7
- import os
8
- from typing import Dict, List
9
- from src.video_model import describe_video
10
- from src.utils import parse_string, parse_annotations
11
-
12
- # --- Function to process single video ---
13
- def process_video_and_questions(video, standing, hands, location, screen):
14
- video_name = os.path.basename(video)
15
- query = f"Answer the questions from the video\n"
16
- additional_info = []
17
- if standing:
18
- additional_info.append("Is the subject in the video standing or sitting?\n")
19
- if hands:
20
- additional_info.append("Is the subject holding any object in their hands?\n")
21
- if location:
22
- additional_info.append("Is the subject present indoors?\n")
23
- if screen:
24
- additional_info.append("Is the subject interacting with a screen in the background by facing the screen?\n")
25
-
26
- end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
27
- <annotation>indoors: 0</annotation>
28
- <annotation>standing: 1</annotation>
29
- <annotation>hands.free: 0</annotation>
30
- <annotation>screen.interaction_yes: 0</annotation>
31
- """
32
-
33
- final_query = query + " " + " ".join(additional_info)
34
- final_prompt = final_query + " " + end_query
35
-
36
- response = describe_video(video, final_prompt)
37
- final_response = f"<video_name>{video_name}</video_name>" + " \n" + response
38
-
39
- conditions = {
40
- 'standing': (standing, 'standing: 1', 'standing: None'),
41
- 'hands': (hands, 'hands.free: 1', 'hands.free: None'),
42
- 'location': (location, 'indoors: 1', 'indoors: None'),
43
- 'screen': (screen, 'screen.interaction_yes: 1', 'screen.interaction_yes: None')
44
- }
45
-
46
- for key, (condition, to_replace, replacement) in conditions.items():
47
- if not condition:
48
- final_response = final_response.replace(to_replace, replacement)
49
-
50
- return final_response
51
-
52
- # Function to save data to a CSV file using pandas
53
- def save_to_csv(observations: List[Dict], output_dir: str = "outputs") -> str:
54
- if not os.path.exists(output_dir):
55
- os.makedirs(output_dir)
56
-
57
- # Convert the list of dictionaries to a pandas DataFrame
58
- df = pd.DataFrame(observations)
59
-
60
- # Specify the CSV file path
61
- csv_file = os.path.join(output_dir, "video_observations.csv")
62
-
63
- # Save the DataFrame to a CSV file
64
- df.to_csv(csv_file, index=False)
65
-
66
- return csv_file
67
-
68
- # Function to process a single video and return the observation data
69
- def process_single_video(video_path, standing, hands, location, screen) -> Dict:
70
- video_name = os.path.basename(video_path) # Extract video name from the path
71
- query = "Describe this video in detail and answer the questions"
72
- additional_info = []
73
- if standing:
74
- additional_info.append("Is the subject in the video standing or sitting?\n")
75
- if hands:
76
- additional_info.append("Is the subject holding any object in their hands?\n")
77
- if location:
78
- additional_info.append("Is the subject present indoors?\n")
79
- if screen:
80
- additional_info.append("Is the subject interacting with a screen in the background by facing the screen?\n")
81
-
82
- end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples:
83
- <annotation>indoors: 0</annotation>
84
- <annotation>standing: 1</annotation>
85
- <annotation>hands.free: 0</annotation>
86
- <annotation>screen.interaction_yes: 0</annotation>
87
- """
88
-
89
- final_query = query + " " + " ".join(additional_info)
90
- final_prompt = final_query + " " + end_query
91
-
92
- # Assuming your describe_video function handles the video processing
93
- response = describe_video(video_path, final_prompt)
94
- final_response = f"<video_name>{video_name}</video_name>" + " \n" + response
95
-
96
- conditions = {
97
- 'standing': (standing, 'standing: 1', 'standing: None'),
98
- 'hands': (hands, 'hands.free: 1', 'hands.free: None'),
99
- 'location': (location, 'indoors: 1', 'indoors: None'),
100
- 'screen': (screen, 'screen.interaction_yes: 1', 'screen.interaction_yes: None')
101
- }
102
-
103
- for key, (condition, to_replace, replacement) in conditions.items():
104
- if not condition:
105
- final_response = final_response.replace(to_replace, replacement)
106
-
107
- # Parse the response to extract video name and annotations
108
- parsed_content = parse_string(final_response, ["video_name", "annotation"])
109
- video_name = parsed_content['video_name'][0] if parsed_content['video_name'] else None
110
- annotations_dict = parse_annotations(parsed_content['annotation']) if parsed_content['annotation'] else {}
111
-
112
- # Return the observation as a dictionary
113
- return {'video_name': video_name, **annotations_dict}
114
-
115
- # Function to process all videos in a folder
116
- def process_multiple_videos(video_files: List[str], standing, hands, location, screen):
117
- all_observations = []
118
-
119
- for video_path in video_files:
120
- observation = process_single_video(video_path, standing, hands, location, screen)
121
- if observation['video_name']: # Only add valid observations
122
- all_observations.append(observation)
123
- else:
124
- print("Error processing video:", video_path) # Log any errors
125
-
126
- # Clear GPU cache
127
- torch.cuda.empty_cache()
128
- gc.collect()
129
-
130
- # Save all observations to a CSV file and return the file path
131
- csv_file = save_to_csv(all_observations)
132
- return "Processing completed. Download the CSV file.", csv_file
133
-
134
- # Gradio interface
135
- def gradio_interface_single(video, standing, hands, location, screen):
136
- return process_video_and_questions(video, standing, hands, location, screen)
137
-
138
- def gradio_interface_multiple(video_files, standing, hands, location, screen):
139
- video_file_paths = [video.name for video in video_files] # Extract file paths from uploaded files
140
- return process_multiple_videos(video_file_paths, standing, hands, location, screen)
141
-
142
- with gr.Blocks() as demo:
143
- with gr.Tab("Single Video Processing"):
144
- with gr.Row():
145
- with gr.Column():
146
- video = gr.Video(label="Video")
147
- standing = gr.Checkbox(label="Standing")
148
- hands = gr.Checkbox(label="Hands Free")
149
- location = gr.Checkbox(label="Indoors")
150
- screen = gr.Checkbox(label="Screen Interaction")
151
- submit_btn = gr.Button("Generate Annotations")
152
- generate_csv_btn = gr.Button("Generate CSV")
153
-
154
- with gr.Column():
155
- response = gr.Textbox(label="Video Description", show_label=True, show_copy_button=True)
156
- csv_output = gr.File(label="Download CSV", interactive=False)
157
-
158
- submit_btn.click(
159
- fn=gradio_interface_single,
160
- inputs=[video, standing, hands, location, screen],
161
- outputs=response
162
- )
163
-
164
- generate_csv_btn.click(
165
- fn=save_to_csv,
166
- inputs=response,
167
- outputs=csv_output
168
- )
169
-
170
- with gr.Tab("Batch Video Processing"):
171
- with gr.Row():
172
- video_files = gr.File(file_count="multiple", file_types=["video"], label="Upload multiple videos")
173
- standing = gr.Checkbox(label="Standing")
174
- hands = gr.Checkbox(label="Hands Free")
175
- location = gr.Checkbox(label="Indoors")
176
- screen = gr.Checkbox(label="Screen Interaction")
177
- submit_btn = gr.Button("Process Videos")
178
- download_link = gr.File(label="Download CSV")
179
-
180
- submit_btn.click(
181
- fn=gradio_interface_multiple,
182
- inputs=[video_files, standing, hands, location, screen],
183
- outputs=[response, download_link]
184
- )
185
-
186
- demo.launch(debug=False)