momergul commited on
Commit
18e7d92
1 Parent(s): 33f8437
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -21,9 +21,9 @@ css="""
21
  """
22
 
23
  def initialize_game() -> List[List[str]]:
24
- context_dicts = [generate_complete_game() for _ in range(4)]
25
 
26
- roles = ["speaker"] * 3 + ["listener"] * 3 + ["speaker"] * 3 + ["listener"] * 3
27
  speaker_images = []
28
  listener_images = []
29
  targets = []
@@ -36,7 +36,7 @@ def initialize_game() -> List[List[str]]:
36
 
37
  return list(zip(speaker_images, listener_images, targets, roles))
38
 
39
- @spaces.GPU(duration=120)
40
  def get_model_response(
41
  model, adapter_name, processor, index_to_token, role: str,
42
  image_paths: List[str], user_message: str = "", target_image: str = ""
@@ -48,14 +48,16 @@ def get_model_response(
48
  input_tokens, attn_mask, images, image_attn_mask, label = joint_speaker_input(
49
  processor, image_paths, target_image, model.get_listener().device
50
  )
 
51
  with torch.no_grad():
52
  image_paths = [image_paths]
53
  captions, _, _, _, _ = model.generate(
54
  images, input_tokens, attn_mask, image_attn_mask, label,
55
  image_paths, processor, img_dir, index_to_token,
56
  max_steps=30, sampling_type="nucleus", temperature=0.7,
57
- top_k=50, top_p=1, repetition_penalty=1, num_samples=10
58
- )
 
59
  response = captions[0]
60
  else: # listener
61
  images, l_input_tokens, l_attn_mask, l_image_attn_mask, s_input_tokens, s_attn_mask, \
@@ -88,13 +90,13 @@ def interaction(model, processor, index_to_token, model_iteration: str) -> Tuple
88
  if model_role == "speaker":
89
  human_role = "Listener"
90
  turn += 1
91
- turn_message = f"{turn}/12"
92
  human_context = listener_image
93
  model_context = speaker_image
94
  target_idx = human_context.index(target_image)
95
 
96
  conversation.extend([
97
- f"TURN: {turn}/12",
98
  f"Guess the target image given the speaker's description. ",
99
  ])
100
  model_message = get_model_response(internal_model, adapter_name, processor, index_to_token, model_role, model_context, target_image=target_image)
@@ -112,13 +114,13 @@ def interaction(model, processor, index_to_token, model_iteration: str) -> Tuple
112
  # listener
113
  human_role = "Speaker"
114
  turn += 1
115
- turn_message = f"{turn}/12"
116
  human_context = speaker_image
117
  model_context = listener_image
118
  target_idx = human_context.index(target_image)
119
 
120
  conversation.extend([
121
- f"TURN: {turn}/12",
122
  f"Generate a description for the target image. Your target is Image {target_idx + 1}",
123
  ])
124
 
@@ -143,7 +145,7 @@ def create_app():
143
  gr.Markdown(
144
  '### You will be playing a sequence of reference games against a model. To start a game, first select whether ' +\
145
  'you wish to play against our initial trained model ("Initial System") or our model at the end of deployment ("Final System") ' +\
146
- 'and press the "Start Game" button. There will be 12 rounds of reference games. You will take on a "listener" or a "speaker" role at each round.'
147
  )
148
 
149
  gr.Markdown(
 
21
  """
22
 
23
  def initialize_game() -> List[List[str]]:
24
+ context_dicts = [generate_complete_game() for _ in range(2)]
25
 
26
+ roles = ["speaker"] * 3 + ["listener"] * 3
27
  speaker_images = []
28
  listener_images = []
29
  targets = []
 
36
 
37
  return list(zip(speaker_images, listener_images, targets, roles))
38
 
39
+ @spaces.GPU
40
  def get_model_response(
41
  model, adapter_name, processor, index_to_token, role: str,
42
  image_paths: List[str], user_message: str = "", target_image: str = ""
 
48
  input_tokens, attn_mask, images, image_attn_mask, label = joint_speaker_input(
49
  processor, image_paths, target_image, model.get_listener().device
50
  )
51
+ print("Hi")
52
  with torch.no_grad():
53
  image_paths = [image_paths]
54
  captions, _, _, _, _ = model.generate(
55
  images, input_tokens, attn_mask, image_attn_mask, label,
56
  image_paths, processor, img_dir, index_to_token,
57
  max_steps=30, sampling_type="nucleus", temperature=0.7,
58
+ top_k=50, top_p=1, repetition_penalty=1, num_samples=5
59
+ )
60
+ print("There")
61
  response = captions[0]
62
  else: # listener
63
  images, l_input_tokens, l_attn_mask, l_image_attn_mask, s_input_tokens, s_attn_mask, \
 
90
  if model_role == "speaker":
91
  human_role = "Listener"
92
  turn += 1
93
+ turn_message = f"{turn}/6"
94
  human_context = listener_image
95
  model_context = speaker_image
96
  target_idx = human_context.index(target_image)
97
 
98
  conversation.extend([
99
+ f"TURN: {turn}/6",
100
  f"Guess the target image given the speaker's description. ",
101
  ])
102
  model_message = get_model_response(internal_model, adapter_name, processor, index_to_token, model_role, model_context, target_image=target_image)
 
114
  # listener
115
  human_role = "Speaker"
116
  turn += 1
117
+ turn_message = f"{turn}/6"
118
  human_context = speaker_image
119
  model_context = listener_image
120
  target_idx = human_context.index(target_image)
121
 
122
  conversation.extend([
123
+ f"TURN: {turn}/6",
124
  f"Generate a description for the target image. Your target is Image {target_idx + 1}",
125
  ])
126
 
 
145
  gr.Markdown(
146
  '### You will be playing a sequence of reference games against a model. To start a game, first select whether ' +\
147
  'you wish to play against our initial trained model ("Initial System") or our model at the end of deployment ("Final System") ' +\
148
+ 'and press the "Start Game" button. There will be 6 rounds of reference games. You will take on a "listener" or a "speaker" role at each round.'
149
  )
150
 
151
  gr.Markdown(