alfredplpl commited on
Commit
323947b
·
verified ·
1 Parent(s): f80a9e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -45,7 +45,7 @@ h1 {
45
 
46
  # Load the tokenizer and model
47
  tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0")
48
- model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0", device_map="auto", torch_dtype=torch.bfloat16)
49
  #model=model.eval()
50
 
51
  @spaces.GPU()
@@ -72,7 +72,7 @@ def chat_llama3_8b(message: str,
72
 
73
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
74
 
75
- streamer = TextIteratorStreamer(tokenizer, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
76
  generate_kwargs = dict(
77
  input_ids= input_ids,
78
  max_new_tokens=max_new_tokens,
 
45
 
46
  # Load the tokenizer and model
47
  tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0")
48
+ model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0", device_map="auto", load_in_8bit=True)
49
  #model=model.eval()
50
 
51
  @spaces.GPU()
 
72
 
73
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
74
 
75
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
76
  generate_kwargs = dict(
77
  input_ids= input_ids,
78
  max_new_tokens=max_new_tokens,