Update README.md
Browse files
README.md
CHANGED
@@ -41,6 +41,23 @@ She is curious and brave and
|
|
41 |
"""
|
42 |
```
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
|
46 |
### Evaluate the model
|
@@ -55,17 +72,17 @@ auto-round --eval --model Intel/phi-2-int4-inc --device cuda:0 --tasks lambada_o
|
|
55 |
|
56 |
| Metric | FP16 | INT4 |
|
57 |
| -------------- | ------ | ------ |
|
58 |
-
| Avg. | 0.6131 | 0.
|
59 |
-
| mmlu | 0.5334 | 0.
|
60 |
-
| lambada_openai | 0.6243 | 0.
|
61 |
-
| hellaswag | 0.5581 | 0.
|
62 |
-
| winogrande | 0.7522 | 0.
|
63 |
-
| piqa | 0.7867 | 0.
|
64 |
-
| truthfulqa_mc1 | 0.3097 | 0.
|
65 |
-
| openbookqa | 0.4040 | 0.
|
66 |
-
| boolq | 0.8346 | 0.
|
67 |
-
| arc_easy | 0.8001 | 0.
|
68 |
-
| arc_challenge | 0.5282 | 0.
|
69 |
|
70 |
|
71 |
|
|
|
41 |
"""
|
42 |
```
|
43 |
|
44 |
+
### Intel Gaudi-2 INT4 Inference
|
45 |
+
docker image with Gaudi Software Stack is recommended. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/).
|
46 |
+
```python
|
47 |
+
import habana_frameworks.torch.core as htcore
|
48 |
+
import habana_frameworks.torch.hpu as hthpu
|
49 |
+
|
50 |
+
from auto_round import AutoRoundConfig
|
51 |
+
from transformers import AutoModelForCausalLM,AutoTokenizer
|
52 |
+
|
53 |
+
quantized_model_dir = "Intel/phi-2-int4-inc"
|
54 |
+
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir)
|
55 |
+
model = AutoModelForCausalLM.from_pretrained(quantized_model_dir).to('hpu').to(bfloat16)
|
56 |
+
text = "下面我来介绍一下阿里巴巴公司,"
|
57 |
+
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
58 |
+
print(tokenizer.decode(model.generate(**inputs, max_new_tokens=50, do_sample=False)[0]))
|
59 |
+
|
60 |
+
```
|
61 |
|
62 |
|
63 |
### Evaluate the model
|
|
|
72 |
|
73 |
| Metric | FP16 | INT4 |
|
74 |
| -------------- | ------ | ------ |
|
75 |
+
| Avg. | 0.6131 | 0.6087 |
|
76 |
+
| mmlu | 0.5334 | 0.5417 |
|
77 |
+
| lambada_openai | 0.6243 | 0.6088 |
|
78 |
+
| hellaswag | 0.5581 | 0.5520 |
|
79 |
+
| winogrande | 0.7522 | 0.7577 |
|
80 |
+
| piqa | 0.7867 | 0.7911 |
|
81 |
+
| truthfulqa_mc1 | 0.3097 | 0.2962 |
|
82 |
+
| openbookqa | 0.4040 | 0.3900 |
|
83 |
+
| boolq | 0.8346 | 0.8333 |
|
84 |
+
| arc_easy | 0.8001 | 0.7980 |
|
85 |
+
| arc_challenge | 0.5282 | 0.5179 |
|
86 |
|
87 |
|
88 |
|