Spaces:

llava-hf
/

llava-4bit

Running on T4

App Files Files Community

merve HF staff commited on Dec 8, 2023

Commit

08bcb47

•

1 Parent(s): db8a6e8

Added back parameters

Browse files

Files changed (1) hide show

app.py +76 -16

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import os
 import string
@@ -17,11 +19,9 @@ quantization_config = BitsAndBytesConfig(
 pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})
 def extract_response_pairs(text):
     pattern = re.compile(r'(USER:.*?)ASSISTANT:(.*?)(?:$|USER:)', re.DOTALL)
     matches = pattern.findall(text)
-    print(matches)
     pairs = [(user.strip(), assistant.strip()) for user, assistant in matches]
@@ -35,19 +35,26 @@ def postprocess_output(output: str) -> str:
-def chat(image, text, max_length, history_chat):
-  prompt = " ".join(history_chat) + f"USER: <image>\n{text}\nASSISTANT:"
   outputs = pipe(image, prompt=prompt,
-                  generate_kwargs={
-                  "max_length":max_length})
-  #output = postprocess_output(outputs[0]["generated_text"])
-  history_chat.append(outputs[0]["generated_text"])
   chat_val =  extract_response_pairs(" ".join(history_chat))
   return chat_val, history_chat
@@ -60,33 +67,81 @@ css = """
   """
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
-    gr.Markdown("LLaVA is now available in transformers with 4-bit quantization ⚡️")
     chatbot = gr.Chatbot(label="Chat", show_label=False)
-    gr.Markdown("Input image and text to start chatting 👇 ")
     with gr.Row():
       image = gr.Image(type="pil")
-      text_input = gr.Text(label="Chat Input", max_lines=1)
     history_chat = gr.State(value=[])
     with gr.Row():
         clear_chat_button = gr.Button("Clear")
         chat_button = gr.Button("Submit", variant="primary")
     with gr.Accordion(label="Advanced settings", open=False):
         max_length = gr.Slider(
             label="Max Length",
             minimum=1,
-            maximum=200,
             step=1,
-            value=150,
         )
     chat_output = [
         chatbot,
         history_chat
     ]
     chat_button.click(fn=chat, inputs=[image,
             text_input,
             max_length,
             history_chat],
         outputs=chat_output,
         api_name="Chat",
@@ -95,7 +150,12 @@ with gr.Blocks(css="style.css") as demo:
     chat_inputs = [
         image,
         text_input,
         max_length,
         history_chat
     ]
     text_input.submit(
@@ -130,4 +190,4 @@ with gr.Blocks(css="style.css") as demo:
 if __name__ == "__main__":
-    demo.queue(max_size=10).launch(debug=True)

+from __future__ import annotations
 import os
 import string
 pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})
 def extract_response_pairs(text):
     pattern = re.compile(r'(USER:.*?)ASSISTANT:(.*?)(?:$|USER:)', re.DOTALL)
     matches = pattern.findall(text)
     pairs = [(user.strip(), assistant.strip()) for user, assistant in matches]
+def chat(image, text, temperature, length_penalty,
+         repetition_penalty, max_length, min_length, num_beams, top_p,
+         history_chat):
+  prompt = " ".join(history_chat)
+  prompt = f"USER: <image>\n{text}\nASSISTANT:"
   outputs = pipe(image, prompt=prompt,
+                  generate_kwargs={"temperature":temperature,
+                  "length_penalty":length_penalty,
+                  "repetition_penalty":repetition_penalty,
+                  "max_length":max_length,
+                  "min_length":min_length,
+                  "num_beams":num_beams,
+                  "top_p":top_p})
+  output = postprocess_output(outputs[0]["generated_text"])
+  history_chat.append(output)
   chat_val =  extract_response_pairs(" ".join(history_chat))
   return chat_val, history_chat
   """
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
+    gr.Markdown("**LLaVA, one of the greatest multimodal chat models is now available in transformers with 4-bit quantization! ⚡️  **")
+    gr.Markdown("**Try it in this demo 🤗 **")
     chatbot = gr.Chatbot(label="Chat", show_label=False)
+    gr.Markdown("Input image and text and start chatting 👇")
     with gr.Row():
       image = gr.Image(type="pil")
+      text_input = gr.Text(label="Chat Input", show_label=False, max_lines=3, container=False)
     history_chat = gr.State(value=[])
     with gr.Row():
         clear_chat_button = gr.Button("Clear")
         chat_button = gr.Button("Submit", variant="primary")
     with gr.Accordion(label="Advanced settings", open=False):
+        temperature = gr.Slider(
+            label="Temperature",
+            info="Used with nucleus sampling.",
+            minimum=0.5,
+            maximum=1.0,
+            step=0.1,
+            value=1.0,
+        )
+        length_penalty = gr.Slider(
+            label="Length Penalty",
+            info="Set to larger for longer sequence, used with beam search.",
+            minimum=-1.0,
+            maximum=2.0,
+            step=0.2,
+            value=1.0,
+        )
+        repetition_penalty = gr.Slider(
+            label="Repetition Penalty",
+            info="Larger value prevents repetition.",
+            minimum=1.0,
+            maximum=5.0,
+            step=0.5,
+            value=1.5,
+        )
         max_length = gr.Slider(
             label="Max Length",
             minimum=1,
+            maximum=512,
             step=1,
+            value=50,
+        )
+        min_length = gr.Slider(
+            label="Minimum Length",
+            minimum=1,
+            maximum=100,
+            step=1,
+            value=1,
+        )
+        top_p = gr.Slider(
+            label="Top P",
+            info="Used with nucleus sampling.",
+            minimum=0.5,
+            maximum=1.0,
+            step=0.1,
+            value=0.9,
         )
     chat_output = [
         chatbot,
         history_chat
     ]
     chat_button.click(fn=chat, inputs=[image,
             text_input,
+            temperature,
+            length_penalty,
+            repetition_penalty,
             max_length,
+            min_length,
+            top_p,
             history_chat],
         outputs=chat_output,
         api_name="Chat",
     chat_inputs = [
         image,
         text_input,
+        temperature,
+        length_penalty,
+        repetition_penalty,
         max_length,
+        min_length,
+        top_p,
         history_chat
     ]
     text_input.submit(
 if __name__ == "__main__":
+    demo.queue(max_size=10).launch()