Kr08 commited on
Commit
26a2377
1 Parent(s): ce6d0cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -6
app.py CHANGED
@@ -1,16 +1,35 @@
1
  import spaces
2
  import gradio as gr
3
 
4
- from airllm import HuggingFaceModelLoader, AutoModelForCausalLM
 
 
 
 
 
 
 
5
 
6
- model_loader = HuggingFaceModelLoader("meta-llama/Meta-Llama-3-8B-Instruct")
7
- model = AutoModelForCausalLM.from_pretrained(model_loader)
8
 
9
  @spaces.GPU
10
  def generate_text(input_text):
11
- input_ids = model.tokenizer.encode(input_text, return_tensors="pt")
12
- output = model.generate(input_ids, max_length=100)
13
- return model.tokenizer.decode(output[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
  iface = gr.Interface(
 
1
  import spaces
2
  import gradio as gr
3
 
4
+ # from airllm import HuggingFaceModelLoader, AutoModelForCausalLM
5
+
6
+ from airllm import AutoModel
7
+ import mlx.core as mx
8
+
9
+ model = AutoModel("meta-llama/Meta-Llama-3-8B-Instruct")
10
+ # model = AutoModel.from_pretrained(model_loader)
11
+ MAX_LENGTH = 128
12
 
 
 
13
 
14
  @spaces.GPU
15
  def generate_text(input_text):
16
+
17
+ input_tokens = model.tokenizer(input_text,
18
+ return_tensors="np",
19
+ return_attention_mask=False,
20
+ truncation=True,
21
+ max_length=MAX_LENGTH,
22
+ padding=False)
23
+
24
+
25
+ output = model.generate(mx.array(input_tokens['input_ids']),
26
+ max_new_tokens=20,
27
+ use_cache=True,
28
+ return_dict_in_generate=True)
29
+ # input_ids = model.tokenizer.encode(input_text, return_tensors="np")
30
+ # output = model.generate(input_ids, max_length=100)
31
+ # return model.tokenizer.decode(output[0])
32
+ return output
33
 
34
 
35
  iface = gr.Interface(