d-matrix commited on
Commit
76e1a38
1 Parent(s): b97e015

updated README

Browse files
Files changed (3) hide show
  1. README.md +39 -4
  2. perplexity.py +6 -6
  3. requirements.txt +1 -0
README.md CHANGED
@@ -29,11 +29,46 @@ It is defined as the exponentiated average negative log-likelihood of a sequence
29
  For more information, see https://huggingface.co/docs/transformers/perplexity
30
 
31
  ## How to Use
32
- At minimum, this metric requires the model and text as inputs.
33
  ```python
34
- >>> perplexity = evaluate.load("d-matrix/perplexity", module_type="metric")
 
35
  >>> input_texts = ["lorem ipsum", "Happy Birthday!", "Bienvenue"]
36
- >>> results = perplexity.compute(model='distilgpt2',text=input_texts)
37
  >>> print(results)
38
  {'accuracy': 1.0}
39
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  For more information, see https://huggingface.co/docs/transformers/perplexity
30
 
31
  ## How to Use
32
+ At minimum, this metric requires the model and data as inputs.
33
  ```python
34
+ >>> import evaluate
35
+ >>> perplexity = evaluate.load("perplexity", module_type="metric")
36
  >>> input_texts = ["lorem ipsum", "Happy Birthday!", "Bienvenue"]
37
+ >>> results = perplexity.compute(model='distilgpt2',data=input_texts)
38
  >>> print(results)
39
  {'accuracy': 1.0}
40
+ ```
41
+
42
+ ### Inputs
43
+ - **model** (`Union`[`str`,`AutoModelForCausalLM`]): model used for calculating Perplexity
44
+ - **data** (`list` of `str`): input text, each separate text snippet is one list entry.
45
+ - **device** (`str`): device to run on, defaults to 'cuda' when available.
46
+ - **max_length** (`int`): maximum sequence length, defaults to 2048.
47
+
48
+ ### Output Values
49
+ - **loss** (`float`): the loss of the model predictions compared to the reference
50
+ - **perplexity**(`float`): measures the uncertainty of a model predicting text. Model performance is better when perplexity is lower.
51
+
52
+ Output Example(s):
53
+ ```python
54
+ {'accuracy': 1.0}
55
+ ```
56
+ This metric outputs a dictionary, containing the loss and perplexity score.
57
+
58
+ ### Examples
59
+ ```python
60
+ >>> import evaluate
61
+ >>> from datasets import load_dataset
62
+ >>> perplexity = evaluate.load("d-matrix/perplexity", module_type="metric")
63
+ >>> input_texts = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")["text"][:10]
64
+ >>> results = perplexity.compute(model='distilgpt2',data=input_texts)
65
+ >>> print(list(results.keys()))
66
+ ['loss', 'perplexity']
67
+ >>> print(results['loss'])
68
+ 3.8299286365509033
69
+ >>> print(results['perplexity'])
70
+ 46.05925369262695
71
+ ```
72
+
73
+ ## Citation(s)
74
+ https://huggingface.co/docs/transformers/perplexity
perplexity.py CHANGED
@@ -20,7 +20,7 @@ Args:
20
  causal versions of t5, and more (the full list can be found
21
  in the AutoModelForCausalLM documentation here:
22
  https://huggingface.co/docs/transformers/master/en/model_doc/auto#transformers.AutoModelForCausalLM )
23
- text (list of str): input text, each separate text snippet is one list entry.
24
  device (str): device to run on, defaults to 'cuda' when available.
25
  max_length (int): maximum sequence length, defaults to 2048.
26
  Returns:
@@ -31,7 +31,7 @@ Examples:
31
  >>> perplexity = evaluate.load("dmx_perplexity", module_type="metric")
32
  >>> input_texts = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")["text"][:10] # doctest: +SKIP
33
  >>> results = perplexity.compute(model='distilgpt2',
34
- ... text=input_texts)
35
  >>> print(list(results.keys()))
36
  ['loss', 'perplexity']
37
  >>> print(results['loss']) # doctest: +SKIP
@@ -40,8 +40,8 @@ Examples:
40
  46.05925369262695
41
  """
42
 
43
-
44
- class DmxPerplexity(evaluate.Metric):
45
  def _info(self):
46
  return evaluate.MetricInfo(
47
  module_type="metric",
@@ -58,7 +58,7 @@ class DmxPerplexity(evaluate.Metric):
58
 
59
  def _compute(
60
  self,
61
- text,
62
  model: Union[str, AutoModelForCausalLM],
63
  device=None,
64
  max_length=None,
@@ -91,7 +91,7 @@ class DmxPerplexity(evaluate.Metric):
91
  max_seq_len = 2048
92
 
93
  model = model.to(device)
94
- encodings = tokenizer("\n\n".join(text), return_tensors="pt")
95
 
96
  stride = max_seq_len
97
  seq_len = encodings.input_ids.size(1)
 
20
  causal versions of t5, and more (the full list can be found
21
  in the AutoModelForCausalLM documentation here:
22
  https://huggingface.co/docs/transformers/master/en/model_doc/auto#transformers.AutoModelForCausalLM )
23
+ data (list of str): input text, each separate text snippet is one list entry.
24
  device (str): device to run on, defaults to 'cuda' when available.
25
  max_length (int): maximum sequence length, defaults to 2048.
26
  Returns:
 
31
  >>> perplexity = evaluate.load("dmx_perplexity", module_type="metric")
32
  >>> input_texts = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")["text"][:10] # doctest: +SKIP
33
  >>> results = perplexity.compute(model='distilgpt2',
34
+ ... data=input_texts)
35
  >>> print(list(results.keys()))
36
  ['loss', 'perplexity']
37
  >>> print(results['loss']) # doctest: +SKIP
 
40
  46.05925369262695
41
  """
42
 
43
+ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
44
+ class Perplexity(evaluate.Metric):
45
  def _info(self):
46
  return evaluate.MetricInfo(
47
  module_type="metric",
 
58
 
59
  def _compute(
60
  self,
61
+ data,
62
  model: Union[str, AutoModelForCausalLM],
63
  device=None,
64
  max_length=None,
 
91
  max_seq_len = 2048
92
 
93
  model = model.to(device)
94
+ encodings = tokenizer("\n\n".join(data), return_tensors="pt")
95
 
96
  stride = max_seq_len
97
  seq_len = encodings.input_ids.size(1)
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  evaluate
2
  transformers
3
  torch
 
1
+ git+https://github.com/huggingface/evaluate@main
2
  evaluate
3
  transformers
4
  torch