Spaces:

alfredplpl
/

llm-jp-instruct-v2

Paused

alfredplpl commited on Apr 30, 2024

Commit

323947b

verified ·

1 Parent(s): f80a9e1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ h1 {
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0")
-model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0", device_map="auto", torch_dtype=torch.bfloat16)
 #model=model.eval()
 @spaces.GPU()
@@ -72,7 +72,7 @@ def chat_llama3_8b(message: str,
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
-    streamer = TextIteratorStreamer(tokenizer, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         input_ids= input_ids,
         max_new_tokens=max_new_tokens,

 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0")
+model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0", device_map="auto", load_in_8bit=True)
 #model=model.eval()
 @spaces.GPU()
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         input_ids= input_ids,
         max_new_tokens=max_new_tokens,