Spaces:
Running
Running
perplexity
#2
by
awais126
- opened
- README.md +2 -2
- perplexity.py +9 -11
- requirements.txt +1 -1
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🤗
|
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
tags:
|
@@ -73,7 +73,7 @@ results = perplexity.compute(model_id='gpt2',
|
|
73 |
print(list(results.keys()))
|
74 |
>>>['perplexities', 'mean_perplexity']
|
75 |
print(round(results["mean_perplexity"], 2))
|
76 |
-
>>>646.
|
77 |
print(round(results["perplexities"][0], 2))
|
78 |
>>>32.25
|
79 |
```
|
|
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.0.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
tags:
|
|
|
73 |
print(list(results.keys()))
|
74 |
>>>['perplexities', 'mean_perplexity']
|
75 |
print(round(results["mean_perplexity"], 2))
|
76 |
+
>>>646.74
|
77 |
print(round(results["perplexities"][0], 2))
|
78 |
>>>32.25
|
79 |
```
|
perplexity.py
CHANGED
@@ -63,10 +63,10 @@ Examples:
|
|
63 |
... predictions=input_texts) # doctest:+ELLIPSIS
|
64 |
>>> print(list(results.keys()))
|
65 |
['perplexities', 'mean_perplexity']
|
66 |
-
>>> print(round(results["mean_perplexity"],
|
67 |
-
|
68 |
-
>>> print(round(results["perplexities"][0],
|
69 |
-
|
70 |
|
71 |
Example 2:
|
72 |
>>> from datasets import load_dataset
|
@@ -100,9 +100,7 @@ class Perplexity(evaluate.Metric):
|
|
100 |
reference_urls=["https://huggingface.co/docs/transformers/perplexity"],
|
101 |
)
|
102 |
|
103 |
-
def _compute(
|
104 |
-
self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None
|
105 |
-
):
|
106 |
|
107 |
if device is not None:
|
108 |
assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
|
@@ -128,20 +126,20 @@ class Perplexity(evaluate.Metric):
|
|
128 |
# assign one of the special tokens to also be the pad token
|
129 |
tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})
|
130 |
|
131 |
-
if add_start_token
|
132 |
# leave room for <BOS> token to be added:
|
133 |
assert (
|
134 |
tokenizer.bos_token is not None
|
135 |
), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
|
136 |
-
max_tokenized_len = max_length - 1
|
137 |
else:
|
138 |
-
max_tokenized_len = max_length
|
139 |
|
140 |
encodings = tokenizer(
|
141 |
predictions,
|
142 |
add_special_tokens=False,
|
143 |
padding=True,
|
144 |
-
truncation=True
|
145 |
max_length=max_tokenized_len,
|
146 |
return_tensors="pt",
|
147 |
return_attention_mask=True,
|
|
|
63 |
... predictions=input_texts) # doctest:+ELLIPSIS
|
64 |
>>> print(list(results.keys()))
|
65 |
['perplexities', 'mean_perplexity']
|
66 |
+
>>> print(round(results["mean_perplexity"], 2))
|
67 |
+
78.22
|
68 |
+
>>> print(round(results["perplexities"][0], 2))
|
69 |
+
11.11
|
70 |
|
71 |
Example 2:
|
72 |
>>> from datasets import load_dataset
|
|
|
100 |
reference_urls=["https://huggingface.co/docs/transformers/perplexity"],
|
101 |
)
|
102 |
|
103 |
+
def _compute(self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, device=None):
|
|
|
|
|
104 |
|
105 |
if device is not None:
|
106 |
assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
|
|
|
126 |
# assign one of the special tokens to also be the pad token
|
127 |
tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})
|
128 |
|
129 |
+
if add_start_token:
|
130 |
# leave room for <BOS> token to be added:
|
131 |
assert (
|
132 |
tokenizer.bos_token is not None
|
133 |
), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
|
134 |
+
max_tokenized_len = model.config.max_length - 1
|
135 |
else:
|
136 |
+
max_tokenized_len = model.config.max_length
|
137 |
|
138 |
encodings = tokenizer(
|
139 |
predictions,
|
140 |
add_special_tokens=False,
|
141 |
padding=True,
|
142 |
+
truncation=True,
|
143 |
max_length=max_tokenized_len,
|
144 |
return_tensors="pt",
|
145 |
return_attention_mask=True,
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
git+https://github.com/huggingface/evaluate@
|
2 |
torch
|
3 |
torch
|
4 |
transformers
|
|
|
1 |
+
git+https://github.com/huggingface/evaluate@4487d9d1e65216a36b4aa94e3396a570f44a1525
|
2 |
torch
|
3 |
torch
|
4 |
transformers
|