AIO_Chat

Runtime error

eswardivi commited on Apr 19, 2024

Commit

8ea3940

verified ·

1 Parent(s): 2cdab2a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ import time
 token = os.environ["HF_TOKEN"]
 quantization_config = BitsAndBytesConfig(
-    load_in_8bit=True, bnb_4bit_compute_dtype=torch.float16
 )
 model = AutoModelForCausalLM.from_pretrained(
@@ -100,6 +100,6 @@ demo = gr.ChatInterface(
     ],
     stop_btn="Stop Generation",
     title="Chat With LLMs",
-    description="Now Running [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) in 8bit"
 )
 demo.launch()

 token = os.environ["HF_TOKEN"]
 quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
 )
 model = AutoModelForCausalLM.from_pretrained(
     ],
     stop_btn="Stop Generation",
     title="Chat With LLMs",
+    description="Now Running [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) in 4bit"
 )
 demo.launch()