Spaces:

ManishThota
/

GSoC-Super-Rapid-Annotator

Runtime error

App Files Files Community

ManishThota commited on Aug 21, 2024

Commit

f9f1c5f

verified ·

1 Parent(s): d22f2b1

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -61

app.py CHANGED Viewed

@@ -7,85 +7,85 @@ from src.utils import parse_string, parse_annotations
 import os
-# # --- Function to construct the final query ---
-# def process_video_and_questions(video, sitting, hands, location, screen):
-#     # Extract the video name (filename)
-#     video_name = os.path.basename(video)
-#     # Construct the query with the video name included
-#     query = f"Describe the video in detail and answer the questions"
-#     additional_info = []
-#     if sitting:
-#         additional_info.append("sitting/standing : Is the subject in the video standing or sitting?")
-#     if hands:
-#         additional_info.append("hands_free: Is the subject holding any object in their hands, if so the hands are not free else they are free?")
-#     if location:
-#         additional_info.append("indoors/outdoors: Is the subject present indoors or outdoors?")
-#     if screen:
-#         additional_info.append("screen_interactions: Is the subject interacting with a screen in the background by facing the screen?")
-#     end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Below is an example:
-#         <annotation>indoors: 0</annotation>
-#         <annotation>standing: 1</annotation>
-#         <annotation>hands.free: None</annotation>
-#         <annotation>screen.interaction_yes: None</annotation>
-#         """
-#     final_query = query + " " + " ".join(additional_info)
-#     final_prompt = final_query + " " + end_query
-#     # Assuming your describe_video function handles the video processing
-#     response = describe_video(video, final_prompt)
-#     final_response = f"<video_name>{video_name}</video_name>" + " " + response
-#     return final_response
 def process_video_and_questions(video, sitting, hands, location, screen):
     # Extract the video name (filename)
     video_name = os.path.basename(video)
     # Construct the query with the video name included
-    query = f"Describe the video in detail and answer the questions"
     additional_info = []
-    # Handle each checkbox option, including those not selected (None)
-    if sitting is not None:
         additional_info.append("Is the subject in the video standing or sitting?")
-    else:
-        additional_info.append("<annotation>standing: None</annotation>")
-    if hands is not None:
         additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
-    else:
-        additional_info.append("<annotation>hands.free: None</annotation>")
-    if location is not None:
         additional_info.append("Is the subject present indoors or outdoors?")
-    else:
-        additional_info.append("<annotation>indoors: None</annotation>")
-    if screen is not None:
         additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
-    else:
-        additional_info.append("<annotation>screen.interaction_yes: None</annotation>")
-    # Updated end_query with structured prompt
-    end_query = """
-    You're an AI assistant, and your goal is to provide the results of the video analysis in the correct format as described below:
-    <annotations>
-    - Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present.
-    - Use <annotation> tags for each attribute like indoors, standing, hands.free, and screen.interaction_yes.
-    </annotations>
-    """
     final_query = query + " " + " ".join(additional_info)
     final_prompt = final_query + " " + end_query
     # Assuming your describe_video function handles the video processing
     response = describe_video(video, final_prompt)
     final_response = f"<video_name>{video_name}</video_name>" + " " + response
-    return final_response
 def output_to_csv(final_response):

 import os
+# --- Function to construct the final query ---
 def process_video_and_questions(video, sitting, hands, location, screen):
     # Extract the video name (filename)
     video_name = os.path.basename(video)
     # Construct the query with the video name included
+    query = f"Answer the questions from the video\n"
     additional_info = []
+    if sitting:
         additional_info.append("Is the subject in the video standing or sitting?")
+    if hands:
         additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
+    if location:
         additional_info.append("Is the subject present indoors or outdoors?")
+    if screen:
         additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
+    end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
+        <annotation>indoors: 0</annotation>
+        <annotation>standing: 1</annotation>
+        <annotation>hands.free: 0</annotation>
+        <annotation>screen.interaction_yes: 0</annotation>
+        """
     final_query = query + " " + " ".join(additional_info)
     final_prompt = final_query + " " + end_query
     # Assuming your describe_video function handles the video processing
     response = describe_video(video, final_prompt)
     final_response = f"<video_name>{video_name}</video_name>" + " " + response
+    return final_response
+# def process_video_and_questions(video, sitting, hands, location, screen):
+#     # Extract the video name (filename)
+#     video_name = os.path.basename(video)
+#     # Construct the query with the video name included
+#     query = f"Describe the video in detail and answer the questions"
+#     additional_info = []
+#     # Handle each checkbox option, including those not selected (None)
+    # if sitting is not None:
+    #     additional_info.append("Is the subject in the video standing or sitting?")
+    # else:
+    #     additional_info.append("<annotation>standing: None</annotation>")
+    # if hands is not None:
+    #     additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
+    # else:
+    #     additional_info.append("<annotation>hands.free: None</annotation>")
+    # if location is not None:
+    #     additional_info.append("Is the subject present indoors or outdoors?")
+    # else:
+    #     additional_info.append("<annotation>indoors: None</annotation>")
+    # if screen is not None:
+    #     additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
+    # else:
+    #     additional_info.append("<annotation>screen.interaction_yes: None</annotation>")
+    # # Updated end_query with structured prompt
+    # end_query = """
+    # You're an AI assistant, and your goal is to provide the results of the video analysis in the correct format as described below:
+    # <annotations>
+    # - Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present.
+    # - Use <annotation> tags for each attribute like indoors, standing, hands.free, and screen.interaction_yes.
+    # </annotations>
+    # """
+    # final_query = query + " " + " ".join(additional_info)
+    # final_prompt = final_query + " " + end_query
+    # # Assuming your describe_video function handles the video processing
+    # response = describe_video(video, final_prompt)
+    # final_response = f"<video_name>{video_name}</video_name>" + " " + response
+    # return final_response
 def output_to_csv(final_response):