Spaces:

llava-hf
/

llava-4bit

Running on T4

App Files Files Community

merve HF staff commited on Dec 8, 2023

Commit

11e466e

•

1 Parent(s): 94445fb

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -12

app.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from __future__ import annotations
 import os
 import string
@@ -19,6 +17,7 @@ quantization_config = BitsAndBytesConfig(
 pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})
 def extract_response_pairs(text):
     pattern = re.compile(r'(USER:.*?)ASSISTANT:(.*?)(?:$|USER:)', re.DOTALL)
     matches = pattern.findall(text)
@@ -36,11 +35,10 @@ def postprocess_output(output: str) -> str:
 def chat(image, text, temperature, length_penalty,
-         repetition_penalty, max_length, min_length, num_beams, top_p,
          history_chat):
-  prompt = " ".join(history_chat)
-  prompt = f"USER: <image>\n{text}\nASSISTANT:"
   outputs = pipe(image, prompt=prompt,
                   generate_kwargs={"temperature":temperature,
@@ -48,13 +46,14 @@ def chat(image, text, temperature, length_penalty,
                   "repetition_penalty":repetition_penalty,
                   "max_length":max_length,
                   "min_length":min_length,
-                  "num_beams":num_beams,
                   "top_p":top_p})
-  output = postprocess_output(outputs[0]["generated_text"])
-  history_chat.append(output)
   chat_val =  extract_response_pairs(" ".join(history_chat))
   return chat_val, history_chat
@@ -67,8 +66,8 @@ css = """
   """
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
-    gr.Markdown("**LLaVA, one of the greatest multimodal chat models is now available in transformers with 4-bit quantization! ⚡️  **")
-    gr.Markdown("**Try it in this demo 🤗 **")
     chatbot = gr.Chatbot(label="Chat", show_label=False)
     gr.Markdown("Input image and text and start chatting 👇")
@@ -185,8 +184,12 @@ with gr.Blocks(css="style.css") as demo:
             chatbot,
             history_chat
         ],
-        queue=False,
-    )
 if __name__ == "__main__":

 import os
 import string
 pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})
 def extract_response_pairs(text):
     pattern = re.compile(r'(USER:.*?)ASSISTANT:(.*?)(?:$|USER:)', re.DOTALL)
     matches = pattern.findall(text)
 def chat(image, text, temperature, length_penalty,
+         repetition_penalty, max_length, min_length, top_p,
          history_chat):
+  prompt = " ".join(history_chat) + f"USER: <image>\n{text}\nASSISTANT:"
   outputs = pipe(image, prompt=prompt,
                   generate_kwargs={"temperature":temperature,
                   "repetition_penalty":repetition_penalty,
                   "max_length":max_length,
                   "min_length":min_length,
                   "top_p":top_p})
+  history_chat.append(outputs[0]["generated_text"])
+  print(f"history_chat is {history_chat}")
   chat_val =  extract_response_pairs(" ".join(history_chat))
+  print(f"chat_val is {chat_val}")
   return chat_val, history_chat
   """
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
+    gr.Markdown("## LLaVA, one of the greatest multimodal chat models is now available in transformers with 4-bit quantization! ⚡️")
+    gr.Markdown("## Try it 4-bit quantized LLaVA this demo 🤗")
     chatbot = gr.Chatbot(label="Chat", show_label=False)
     gr.Markdown("Input image and text and start chatting 👇")
             chatbot,
             history_chat
         ],
+        queue=False)
+    examples = [["/content/baklava.png", "How to make this pastry?"],["/content/bee.png","Describe this image."]]
+    gr.Examples(examples=examples, inputs=[image, text_input, chat_inputs])
 if __name__ == "__main__":