Spaces:

adminuhstraydur
/

Florence-DarkIdol

Running

App Files Files Community

aifeifei798 commited on Jul 4, 2024

Commit

8d8c753

verified ·

1 Parent(s): 6f094dc

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -6

app.py CHANGED Viewed

@@ -8,6 +8,12 @@ from transformers import AutoProcessor, AutoModelForCausalLM
 import re
 from PIL import Image
 import io
 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
@@ -22,6 +28,7 @@ llm = Llama(
     ),
     n_ctx=2048,
     n_gpu_layers=100, # change n_gpu_layers if you have more or less VRAM
 )
@@ -63,12 +70,23 @@ def generate_text(
         yield picoutput
     else:
         temp = ""
-        system_prompt = f'{system_message}'
         for interaction in history:
-            history_prompt += str(interaction[0]) +  str(interaction[1])
-        input_prompt = system_prompt + "  " + history_prompt + "  " + str(in_text)
-        print(input_prompt)
-        output = llm(
             input_prompt,
             temperature=temperature,
             top_p=top_p,
@@ -84,12 +102,16 @@ def generate_text(
                 "SYSTEM:",
                 "<|start_header_id|>",
                 "<|eot_id|>",
             ],
             stream=True,
         )
         for out in output:
             stream = copy.deepcopy(out)
-            temp += stream["choices"][0]["text"]
             yield temp

 import re
 from PIL import Image
 import io
+import json
+import logging
+# Set up logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
     ),
     n_ctx=2048,
     n_gpu_layers=100, # change n_gpu_layers if you have more or less VRAM
+    chat_format="llama-3",
 )
         yield picoutput
     else:
         temp = ""
+        # Create system_prompt as a dictionary
+        system_prompt = {"role": "system", "content": system_message}
+        # Create history_prompt as a list of dictionaries
+        history_prompt = []
         for interaction in history:
+            user_part = {"role": "user", "content": str(interaction[0])}
+            assistant_part = {"role": "assistant", "content": str(interaction[1])}
+            history_prompt.extend([user_part, assistant_part])
+        # Create user_input_part as a dictionary
+        user_input_part = {"role": "user", "content": str(in_text)}
+        # Construct input_prompt as a list of dictionaries
+        input_prompt = [system_prompt] + history_prompt + [user_input_part]
+        logger.debug(f"Input Prompt: {input_prompt}")
+        output = llm.create_chat_completion(
             input_prompt,
             temperature=temperature,
             top_p=top_p,
                 "SYSTEM:",
                 "<|start_header_id|>",
                 "<|eot_id|>",
+                "<im_end>",
+                "<|im_end|>",
             ],
             stream=True,
         )
         for out in output:
             stream = copy.deepcopy(out)
+            logger.debug(f"Stream: {stream}")
+            if 'delta' in stream['choices'][0] and 'content' in stream['choices'][0]['delta']:
+                temp += stream["choices"][0]["delta"]["content"]
             yield temp