ManishThota commited on
Commit
89b6fe6
·
verified ·
1 Parent(s): 88cedc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -26
app.py CHANGED
@@ -6,6 +6,7 @@ from src.video_model import describe_video
6
  from src.utils import parse_string, parse_annotations
7
  import os
8
 
 
9
  # # --- Function to construct the final query ---
10
  # def process_video_and_questions(video, sitting, hands, location, screen):
11
  # # Extract the video name (filename)
@@ -15,32 +16,29 @@ import os
15
  # query = f"Describe the video in detail and answer the questions"
16
  # additional_info = []
17
  # if sitting:
18
- # additional_info.append("Is the subject in the video standing or sitting?")
19
  # if hands:
20
- # additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
21
  # if location:
22
- # additional_info.append("Is the subject present indoors or outdoors?")
23
  # if screen:
24
- # additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
25
 
26
  # end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Below is an example:
27
- # <instructions>
28
- # <annotation>indoors: 0</annotation>
29
- # <annotation>standing: 1</annotation>
30
- # <annotation>hands.free: None</annotation>
31
- # <annotation>screen.interaction_yes: None</annotation>
32
- # </instructions>
33
-
34
  # """
35
 
36
-
37
- # final_query = query + " " + " ".join(additional_info)
38
- # final_prompt = final_query + " " + end_query
39
 
40
- # # Assuming your describe_video function handles the video processing
41
- # response = describe_video(video, final_prompt)
42
- # final_response = f"<video_name>{video_name}</video_name>" + " " + response
43
- # return final_response
 
44
 
45
  def process_video_and_questions(video, sitting, hands, location, screen):
46
  # Extract the video name (filename)
@@ -71,13 +69,22 @@ def process_video_and_questions(video, sitting, hands, location, screen):
71
  else:
72
  additional_info.append("<annotation>screen.interaction_yes: None</annotation>")
73
 
74
- # Updated end_query string with clear explanation and example
75
- end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Below is an example:
76
-
77
- <annotation>indoors: 0</annotation>
78
- <annotation>standing: 1</annotation>
79
- <annotation>hands.free: None</annotation>
80
- <annotation>screen.interaction_yes: None</annotation>
 
 
 
 
 
 
 
 
 
81
  """
82
 
83
  final_query = query + " " + " ".join(additional_info)
@@ -89,7 +96,6 @@ def process_video_and_questions(video, sitting, hands, location, screen):
89
  return final_response
90
 
91
 
92
-
93
  def output_to_csv(final_response):
94
  # Parse the string to get the content
95
  parsed_content = parse_string(final_response, ["video_name", "annotation"])
 
6
  from src.utils import parse_string, parse_annotations
7
  import os
8
 
9
+
10
  # # --- Function to construct the final query ---
11
  # def process_video_and_questions(video, sitting, hands, location, screen):
12
  # # Extract the video name (filename)
 
16
  # query = f"Describe the video in detail and answer the questions"
17
  # additional_info = []
18
  # if sitting:
19
+ # additional_info.append("sitting/standing : Is the subject in the video standing or sitting?")
20
  # if hands:
21
+ # additional_info.append("hands_free: Is the subject holding any object in their hands, if so the hands are not free else they are free?")
22
  # if location:
23
+ # additional_info.append("indoors/outdoors: Is the subject present indoors or outdoors?")
24
  # if screen:
25
+ # additional_info.append("screen_interactions: Is the subject interacting with a screen in the background by facing the screen?")
26
 
27
  # end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Below is an example:
28
+ # <annotation>indoors: 0</annotation>
29
+ # <annotation>standing: 1</annotation>
30
+ # <annotation>hands.free: None</annotation>
31
+ # <annotation>screen.interaction_yes: None</annotation>
 
 
 
32
  # """
33
 
34
+ # final_query = query + " " + " ".join(additional_info)
35
+ # final_prompt = final_query + " " + end_query
 
36
 
37
+ # # Assuming your describe_video function handles the video processing
38
+ # response = describe_video(video, final_prompt)
39
+ # final_response = f"<video_name>{video_name}</video_name>" + " " + response
40
+ # return final_response
41
+
42
 
43
  def process_video_and_questions(video, sitting, hands, location, screen):
44
  # Extract the video name (filename)
 
69
  else:
70
  additional_info.append("<annotation>screen.interaction_yes: None</annotation>")
71
 
72
+ # Updated end_query with structured prompt
73
+ end_query = """
74
+ You're an AI assistant, and your goal is to provide the results of the video analysis in the correct format as described below:
75
+
76
+ <annotations>
77
+ - Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present.
78
+ - Use <annotation> tags for each attribute like indoors, standing, hands.free, and screen.interaction_yes.
79
+ </annotations>
80
+
81
+ <example>
82
+ - Here's an example of the expected format:
83
+ <annotation>indoors: 0</annotation>
84
+ <annotation>standing: 1</annotation>
85
+ <annotation>hands.free: None</annotation>
86
+ <annotation>screen.interaction_yes: None</annotation>
87
+ </example>
88
  """
89
 
90
  final_query = query + " " + " ".join(additional_info)
 
96
  return final_response
97
 
98
 
 
99
  def output_to_csv(final_response):
100
  # Parse the string to get the content
101
  parsed_content = parse_string(final_response, ["video_name", "annotation"])