ManishThota commited on
Commit
f9f1c5f
·
verified ·
1 Parent(s): d22f2b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -61
app.py CHANGED
@@ -7,85 +7,85 @@ from src.utils import parse_string, parse_annotations
7
  import os
8
 
9
 
10
- # # --- Function to construct the final query ---
11
- # def process_video_and_questions(video, sitting, hands, location, screen):
12
- # # Extract the video name (filename)
13
- # video_name = os.path.basename(video)
14
-
15
- # # Construct the query with the video name included
16
- # query = f"Describe the video in detail and answer the questions"
17
- # additional_info = []
18
- # if sitting:
19
- # additional_info.append("sitting/standing : Is the subject in the video standing or sitting?")
20
- # if hands:
21
- # additional_info.append("hands_free: Is the subject holding any object in their hands, if so the hands are not free else they are free?")
22
- # if location:
23
- # additional_info.append("indoors/outdoors: Is the subject present indoors or outdoors?")
24
- # if screen:
25
- # additional_info.append("screen_interactions: Is the subject interacting with a screen in the background by facing the screen?")
26
-
27
- # end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Below is an example:
28
- # <annotation>indoors: 0</annotation>
29
- # <annotation>standing: 1</annotation>
30
- # <annotation>hands.free: None</annotation>
31
- # <annotation>screen.interaction_yes: None</annotation>
32
- # """
33
-
34
- # final_query = query + " " + " ".join(additional_info)
35
- # final_prompt = final_query + " " + end_query
36
-
37
- # # Assuming your describe_video function handles the video processing
38
- # response = describe_video(video, final_prompt)
39
- # final_response = f"<video_name>{video_name}</video_name>" + " " + response
40
- # return final_response
41
-
42
-
43
  def process_video_and_questions(video, sitting, hands, location, screen):
44
  # Extract the video name (filename)
45
  video_name = os.path.basename(video)
46
 
47
  # Construct the query with the video name included
48
- query = f"Describe the video in detail and answer the questions"
49
  additional_info = []
50
-
51
- # Handle each checkbox option, including those not selected (None)
52
- if sitting is not None:
53
  additional_info.append("Is the subject in the video standing or sitting?")
54
- else:
55
- additional_info.append("<annotation>standing: None</annotation>")
56
-
57
- if hands is not None:
58
  additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
59
- else:
60
- additional_info.append("<annotation>hands.free: None</annotation>")
61
-
62
- if location is not None:
63
  additional_info.append("Is the subject present indoors or outdoors?")
64
- else:
65
- additional_info.append("<annotation>indoors: None</annotation>")
66
-
67
- if screen is not None:
68
  additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
69
- else:
70
- additional_info.append("<annotation>screen.interaction_yes: None</annotation>")
71
 
72
- # Updated end_query with structured prompt
73
- end_query = """
74
- You're an AI assistant, and your goal is to provide the results of the video analysis in the correct format as described below:
 
 
 
75
 
76
- <annotations>
77
- - Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present.
78
- - Use <annotation> tags for each attribute like indoors, standing, hands.free, and screen.interaction_yes.
79
- </annotations>
80
- """
81
-
82
  final_query = query + " " + " ".join(additional_info)
83
  final_prompt = final_query + " " + end_query
84
 
85
  # Assuming your describe_video function handles the video processing
86
  response = describe_video(video, final_prompt)
87
  final_response = f"<video_name>{video_name}</video_name>" + " " + response
88
- return final_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
 
91
  def output_to_csv(final_response):
 
7
  import os
8
 
9
 
10
+ # --- Function to construct the final query ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def process_video_and_questions(video, sitting, hands, location, screen):
12
  # Extract the video name (filename)
13
  video_name = os.path.basename(video)
14
 
15
  # Construct the query with the video name included
16
+ query = f"Answer the questions from the video\n"
17
  additional_info = []
18
+ if sitting:
 
 
19
  additional_info.append("Is the subject in the video standing or sitting?")
20
+ if hands:
 
 
 
21
  additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
22
+ if location:
 
 
 
23
  additional_info.append("Is the subject present indoors or outdoors?")
24
+ if screen:
 
 
 
25
  additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
 
 
26
 
27
+ end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
28
+ <annotation>indoors: 0</annotation>
29
+ <annotation>standing: 1</annotation>
30
+ <annotation>hands.free: 0</annotation>
31
+ <annotation>screen.interaction_yes: 0</annotation>
32
+ """
33
 
 
 
 
 
 
 
34
  final_query = query + " " + " ".join(additional_info)
35
  final_prompt = final_query + " " + end_query
36
 
37
  # Assuming your describe_video function handles the video processing
38
  response = describe_video(video, final_prompt)
39
  final_response = f"<video_name>{video_name}</video_name>" + " " + response
40
+ return final_response
41
+
42
+
43
+ # def process_video_and_questions(video, sitting, hands, location, screen):
44
+ # # Extract the video name (filename)
45
+ # video_name = os.path.basename(video)
46
+
47
+ # # Construct the query with the video name included
48
+ # query = f"Describe the video in detail and answer the questions"
49
+ # additional_info = []
50
+
51
+ # # Handle each checkbox option, including those not selected (None)
52
+ # if sitting is not None:
53
+ # additional_info.append("Is the subject in the video standing or sitting?")
54
+ # else:
55
+ # additional_info.append("<annotation>standing: None</annotation>")
56
+
57
+ # if hands is not None:
58
+ # additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
59
+ # else:
60
+ # additional_info.append("<annotation>hands.free: None</annotation>")
61
+
62
+ # if location is not None:
63
+ # additional_info.append("Is the subject present indoors or outdoors?")
64
+ # else:
65
+ # additional_info.append("<annotation>indoors: None</annotation>")
66
+
67
+ # if screen is not None:
68
+ # additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
69
+ # else:
70
+ # additional_info.append("<annotation>screen.interaction_yes: None</annotation>")
71
+
72
+ # # Updated end_query with structured prompt
73
+ # end_query = """
74
+ # You're an AI assistant, and your goal is to provide the results of the video analysis in the correct format as described below:
75
+
76
+ # <annotations>
77
+ # - Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present.
78
+ # - Use <annotation> tags for each attribute like indoors, standing, hands.free, and screen.interaction_yes.
79
+ # </annotations>
80
+ # """
81
+
82
+ # final_query = query + " " + " ".join(additional_info)
83
+ # final_prompt = final_query + " " + end_query
84
+
85
+ # # Assuming your describe_video function handles the video processing
86
+ # response = describe_video(video, final_prompt)
87
+ # final_response = f"<video_name>{video_name}</video_name>" + " " + response
88
+ # return final_response
89
 
90
 
91
  def output_to_csv(final_response):