Spaces:

lillab-demos
/

cogen

Sleeping

App Files Files Community

momergul commited on Sep 20

Commit

18e7d92

•

1 Parent(s): 33f8437

Update

Browse files

Files changed (1) hide show

app.py +12 -10

app.py CHANGED Viewed

@@ -21,9 +21,9 @@ css="""
 """
 def initialize_game() -> List[List[str]]:
-    context_dicts = [generate_complete_game() for _ in range(4)]
-    roles = ["speaker"] * 3 + ["listener"] * 3 + ["speaker"] * 3 + ["listener"] * 3
     speaker_images = []
     listener_images = []
     targets = []
@@ -36,7 +36,7 @@ def initialize_game() -> List[List[str]]:
     return list(zip(speaker_images, listener_images, targets, roles))
-@spaces.GPU(duration=120)
 def get_model_response(
         model, adapter_name, processor, index_to_token, role: str,
         image_paths: List[str], user_message: str = "", target_image: str = ""
@@ -48,14 +48,16 @@ def get_model_response(
         input_tokens, attn_mask, images, image_attn_mask, label = joint_speaker_input(
             processor, image_paths, target_image, model.get_listener().device
         )
         with torch.no_grad():
             image_paths = [image_paths]
             captions, _, _, _, _ = model.generate(
                 images, input_tokens, attn_mask, image_attn_mask, label,
                 image_paths, processor, img_dir, index_to_token,
                 max_steps=30, sampling_type="nucleus", temperature=0.7,
-                top_k=50, top_p=1, repetition_penalty=1, num_samples=10
-            )
         response = captions[0]
     else:  # listener
         images, l_input_tokens, l_attn_mask, l_image_attn_mask, s_input_tokens, s_attn_mask, \
@@ -88,13 +90,13 @@ def interaction(model, processor, index_to_token, model_iteration: str) -> Tuple
         if model_role == "speaker":
             human_role = "Listener"
             turn += 1
-            turn_message = f"{turn}/12"
             human_context = listener_image
             model_context = speaker_image
             target_idx = human_context.index(target_image)
             conversation.extend([
-                f"TURN: {turn}/12",
                 f"Guess the target image given the speaker's description. ",
             ])
             model_message = get_model_response(internal_model, adapter_name, processor, index_to_token, model_role, model_context, target_image=target_image)
@@ -112,13 +114,13 @@ def interaction(model, processor, index_to_token, model_iteration: str) -> Tuple
             # listener
             human_role = "Speaker"
             turn += 1
-            turn_message = f"{turn}/12"
             human_context = speaker_image
             model_context = listener_image
             target_idx = human_context.index(target_image)
             conversation.extend([
-                f"TURN: {turn}/12",
                 f"Generate a description for the target image. Your target is Image {target_idx + 1}",
             ])
@@ -143,7 +145,7 @@ def create_app():
         gr.Markdown(
             '### You will be playing a sequence of reference games against a model. To start a game, first select whether ' +\
             'you wish to play against our initial trained model ("Initial System") or our model at the end of deployment ("Final System") ' +\
-            'and press the "Start Game" button. There will be 12 rounds of reference games. You will take on a "listener" or a "speaker" role at each round.'
         )
         gr.Markdown(

 """
 def initialize_game() -> List[List[str]]:
+    context_dicts = [generate_complete_game() for _ in range(2)]
+    roles = ["speaker"] * 3 + ["listener"] * 3
     speaker_images = []
     listener_images = []
     targets = []
     return list(zip(speaker_images, listener_images, targets, roles))
+@spaces.GPU
 def get_model_response(
         model, adapter_name, processor, index_to_token, role: str,
         image_paths: List[str], user_message: str = "", target_image: str = ""
         input_tokens, attn_mask, images, image_attn_mask, label = joint_speaker_input(
             processor, image_paths, target_image, model.get_listener().device
         )
+        print("Hi")
         with torch.no_grad():
             image_paths = [image_paths]
             captions, _, _, _, _ = model.generate(
                 images, input_tokens, attn_mask, image_attn_mask, label,
                 image_paths, processor, img_dir, index_to_token,
                 max_steps=30, sampling_type="nucleus", temperature=0.7,
+                top_k=50, top_p=1, repetition_penalty=1, num_samples=5
+            )
+        print("There")
         response = captions[0]
     else:  # listener
         images, l_input_tokens, l_attn_mask, l_image_attn_mask, s_input_tokens, s_attn_mask, \
         if model_role == "speaker":
             human_role = "Listener"
             turn += 1
+            turn_message = f"{turn}/6"
             human_context = listener_image
             model_context = speaker_image
             target_idx = human_context.index(target_image)
             conversation.extend([
+                f"TURN: {turn}/6",
                 f"Guess the target image given the speaker's description. ",
             ])
             model_message = get_model_response(internal_model, adapter_name, processor, index_to_token, model_role, model_context, target_image=target_image)
             # listener
             human_role = "Speaker"
             turn += 1
+            turn_message = f"{turn}/6"
             human_context = speaker_image
             model_context = listener_image
             target_idx = human_context.index(target_image)
             conversation.extend([
+                f"TURN: {turn}/6",
                 f"Generate a description for the target image. Your target is Image {target_idx + 1}",
             ])
         gr.Markdown(
             '### You will be playing a sequence of reference games against a model. To start a game, first select whether ' +\
             'you wish to play against our initial trained model ("Initial System") or our model at the end of deployment ("Final System") ' +\
+            'and press the "Start Game" button. There will be 6 rounds of reference games. You will take on a "listener" or a "speaker" role at each round.'
         )
         gr.Markdown(