Spaces:
Sleeping
Sleeping
momergul
commited on
Commit
•
18e7d92
1
Parent(s):
33f8437
Update
Browse files
app.py
CHANGED
@@ -21,9 +21,9 @@ css="""
|
|
21 |
"""
|
22 |
|
23 |
def initialize_game() -> List[List[str]]:
|
24 |
-
context_dicts = [generate_complete_game() for _ in range(
|
25 |
|
26 |
-
roles = ["speaker"] * 3 + ["listener"] * 3
|
27 |
speaker_images = []
|
28 |
listener_images = []
|
29 |
targets = []
|
@@ -36,7 +36,7 @@ def initialize_game() -> List[List[str]]:
|
|
36 |
|
37 |
return list(zip(speaker_images, listener_images, targets, roles))
|
38 |
|
39 |
-
@spaces.GPU
|
40 |
def get_model_response(
|
41 |
model, adapter_name, processor, index_to_token, role: str,
|
42 |
image_paths: List[str], user_message: str = "", target_image: str = ""
|
@@ -48,14 +48,16 @@ def get_model_response(
|
|
48 |
input_tokens, attn_mask, images, image_attn_mask, label = joint_speaker_input(
|
49 |
processor, image_paths, target_image, model.get_listener().device
|
50 |
)
|
|
|
51 |
with torch.no_grad():
|
52 |
image_paths = [image_paths]
|
53 |
captions, _, _, _, _ = model.generate(
|
54 |
images, input_tokens, attn_mask, image_attn_mask, label,
|
55 |
image_paths, processor, img_dir, index_to_token,
|
56 |
max_steps=30, sampling_type="nucleus", temperature=0.7,
|
57 |
-
top_k=50, top_p=1, repetition_penalty=1, num_samples=
|
58 |
-
)
|
|
|
59 |
response = captions[0]
|
60 |
else: # listener
|
61 |
images, l_input_tokens, l_attn_mask, l_image_attn_mask, s_input_tokens, s_attn_mask, \
|
@@ -88,13 +90,13 @@ def interaction(model, processor, index_to_token, model_iteration: str) -> Tuple
|
|
88 |
if model_role == "speaker":
|
89 |
human_role = "Listener"
|
90 |
turn += 1
|
91 |
-
turn_message = f"{turn}/
|
92 |
human_context = listener_image
|
93 |
model_context = speaker_image
|
94 |
target_idx = human_context.index(target_image)
|
95 |
|
96 |
conversation.extend([
|
97 |
-
f"TURN: {turn}/
|
98 |
f"Guess the target image given the speaker's description. ",
|
99 |
])
|
100 |
model_message = get_model_response(internal_model, adapter_name, processor, index_to_token, model_role, model_context, target_image=target_image)
|
@@ -112,13 +114,13 @@ def interaction(model, processor, index_to_token, model_iteration: str) -> Tuple
|
|
112 |
# listener
|
113 |
human_role = "Speaker"
|
114 |
turn += 1
|
115 |
-
turn_message = f"{turn}/
|
116 |
human_context = speaker_image
|
117 |
model_context = listener_image
|
118 |
target_idx = human_context.index(target_image)
|
119 |
|
120 |
conversation.extend([
|
121 |
-
f"TURN: {turn}/
|
122 |
f"Generate a description for the target image. Your target is Image {target_idx + 1}",
|
123 |
])
|
124 |
|
@@ -143,7 +145,7 @@ def create_app():
|
|
143 |
gr.Markdown(
|
144 |
'### You will be playing a sequence of reference games against a model. To start a game, first select whether ' +\
|
145 |
'you wish to play against our initial trained model ("Initial System") or our model at the end of deployment ("Final System") ' +\
|
146 |
-
'and press the "Start Game" button. There will be
|
147 |
)
|
148 |
|
149 |
gr.Markdown(
|
|
|
21 |
"""
|
22 |
|
23 |
def initialize_game() -> List[List[str]]:
|
24 |
+
context_dicts = [generate_complete_game() for _ in range(2)]
|
25 |
|
26 |
+
roles = ["speaker"] * 3 + ["listener"] * 3
|
27 |
speaker_images = []
|
28 |
listener_images = []
|
29 |
targets = []
|
|
|
36 |
|
37 |
return list(zip(speaker_images, listener_images, targets, roles))
|
38 |
|
39 |
+
@spaces.GPU
|
40 |
def get_model_response(
|
41 |
model, adapter_name, processor, index_to_token, role: str,
|
42 |
image_paths: List[str], user_message: str = "", target_image: str = ""
|
|
|
48 |
input_tokens, attn_mask, images, image_attn_mask, label = joint_speaker_input(
|
49 |
processor, image_paths, target_image, model.get_listener().device
|
50 |
)
|
51 |
+
print("Hi")
|
52 |
with torch.no_grad():
|
53 |
image_paths = [image_paths]
|
54 |
captions, _, _, _, _ = model.generate(
|
55 |
images, input_tokens, attn_mask, image_attn_mask, label,
|
56 |
image_paths, processor, img_dir, index_to_token,
|
57 |
max_steps=30, sampling_type="nucleus", temperature=0.7,
|
58 |
+
top_k=50, top_p=1, repetition_penalty=1, num_samples=5
|
59 |
+
)
|
60 |
+
print("There")
|
61 |
response = captions[0]
|
62 |
else: # listener
|
63 |
images, l_input_tokens, l_attn_mask, l_image_attn_mask, s_input_tokens, s_attn_mask, \
|
|
|
90 |
if model_role == "speaker":
|
91 |
human_role = "Listener"
|
92 |
turn += 1
|
93 |
+
turn_message = f"{turn}/6"
|
94 |
human_context = listener_image
|
95 |
model_context = speaker_image
|
96 |
target_idx = human_context.index(target_image)
|
97 |
|
98 |
conversation.extend([
|
99 |
+
f"TURN: {turn}/6",
|
100 |
f"Guess the target image given the speaker's description. ",
|
101 |
])
|
102 |
model_message = get_model_response(internal_model, adapter_name, processor, index_to_token, model_role, model_context, target_image=target_image)
|
|
|
114 |
# listener
|
115 |
human_role = "Speaker"
|
116 |
turn += 1
|
117 |
+
turn_message = f"{turn}/6"
|
118 |
human_context = speaker_image
|
119 |
model_context = listener_image
|
120 |
target_idx = human_context.index(target_image)
|
121 |
|
122 |
conversation.extend([
|
123 |
+
f"TURN: {turn}/6",
|
124 |
f"Generate a description for the target image. Your target is Image {target_idx + 1}",
|
125 |
])
|
126 |
|
|
|
145 |
gr.Markdown(
|
146 |
'### You will be playing a sequence of reference games against a model. To start a game, first select whether ' +\
|
147 |
'you wish to play against our initial trained model ("Initial System") or our model at the end of deployment ("Final System") ' +\
|
148 |
+
'and press the "Start Game" button. There will be 6 rounds of reference games. You will take on a "listener" or a "speaker" role at each round.'
|
149 |
)
|
150 |
|
151 |
gr.Markdown(
|