Spaces:

tomg-group-umd
/

lm-watermarking

Sleeping

App Files Files Community

jwkirchenbauer commited on Feb 17, 2023

Commit

29d8de2

•

1 Parent(s): cb2cffc

Gradio details panels, docstrings

Browse files

Files changed (2) hide show

demo_watermark.py +95 -21
requirements.txt +1 -0

demo_watermark.py CHANGED Viewed

@@ -33,6 +33,7 @@ from transformers import (AutoTokenizer,
 from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
 def str2bool(v):
     if isinstance(v, bool):
         return v
     if v.lower() in ('yes', 'true', 't', 'y', '1'):
@@ -43,6 +44,7 @@ def str2bool(v):
         raise argparse.ArgumentTypeError('Boolean value expected.')
 def parse_args():
     parser = argparse.ArgumentParser(description="A minimum working example of applying the watermark to any LLM that supports the huggingface 🤗 `generate` API")
@@ -164,6 +166,8 @@ def parse_args():
     return args
 def load_model(args):
     args.is_seq2seq_model = any([(model_type in args.model_name_or_path) for model_type in ["t5","T0"]])
     args.is_decoder_only_model = any([(model_type in args.model_name_or_path) for model_type in ["gpt","opt","bloom"]])
     if args.is_seq2seq_model:
@@ -185,7 +189,10 @@ def load_model(args):
     return model, tokenizer, device
 def generate(prompt, args, model=None, device=None, tokenizer=None):
     print(f"Generating with {args}")
     watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
@@ -251,25 +258,16 @@ def generate(prompt, args, model=None, device=None, tokenizer=None):
             # decoded_output_with_watermark)
 def format_names(s):
     s=s.replace("num_tokens_scored","Tokens Counted (T)")
     s=s.replace("num_green_tokens","# Tokens in Greenlist")
     s=s.replace("green_fraction","Fraction of T in Greenlist")
     s=s.replace("z_score","z-score")
     s=s.replace("p_value","p value")
     return s
-# def str_format_scores(score_dict, detection_threshold):
-#     output_str = f"@ z-score threshold={detection_threshold}:\n\n"
-#     for k,v in score_dict.items():
-#         if k=='green_fraction':
-#             output_str+=f"{format_names(k)}={v:.1%}"
-#         elif k=='confidence':
-#             output_str+=f"{format_names(k)}={v:.3%}"
-#         elif isinstance(v, float):
-#             output_str+=f"{format_names(k)}={v:.3g}"
-#         else:
-#             output_str += v
-#     return output_str
 def list_format_scores(score_dict, detection_threshold):
     lst_2d = []
     lst_2d.append(["z-score threshold", f"{detection_threshold}"])
     for k,v in score_dict.items():
@@ -286,6 +284,8 @@ def list_format_scores(score_dict, detection_threshold):
     return lst_2d
 def detect(input_text, args, device=None, tokenizer=None):
     watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
                                         gamma=args.gamma,
                                         seeding_scheme=args.seeding_scheme,
@@ -306,7 +306,7 @@ def detect(input_text, args, device=None, tokenizer=None):
     return output, args
 def run_gradio(args, model=None, device=None, tokenizer=None):
     generate_partial = partial(generate, model=model, device=device, tokenizer=tokenizer)
     detect_partial = partial(detect, device=device, tokenizer=tokenizer)
@@ -315,15 +315,38 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
         # Top section, greeting and instructions
         gr.Markdown("## 💧 [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) 🔍")
         gr.Markdown("[jwkirchenbauer/lm-watermarking![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)")
         with gr.Accordion("A note on model capability",open=False):
             gr.Markdown(
                 """
-                The models that can be used in this demo are limited to those that are open source as well as fit on a single commodity GPU. In particular, there are few models above 10B parameters and way fewer trained using both Instruction finetuning or RLHF that are open source that we can use.
-                Therefore, the model, in both it's un-watermarked (normal) and watermarked state, is not generally able to respond well to the kinds of prompts that a 100B+ Instruction and RLHF tuned model such as ChatGPT, Claude, or Bard is.
-                We suggest you try prompts that give the model a few sentences and then allow it to 'continue' the prompt, as these weaker models are more capable in this simpler language modeling setting.
                 """
                 )
@@ -407,9 +430,59 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
                         seed_separately = gr.Checkbox(label="Seed both generations separately", value=args.seed_separately)
                     with gr.Column(scale=1):
                         select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
         gr.HTML("""
-                <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
                 <br/>
                 <a href="https://huggingface.co/spaces/tomg-group-umd/lm-watermarking?duplicate=true">
                 <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
@@ -506,7 +579,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
         demo.launch()
 def main(args):
     # Initial arg processing and log
     args.normalizers = (args.normalizers.split(",") if args.normalizers else [])
     print(args)

 from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
 def str2bool(v):
+    """Util function for user friendly boolean flag args"""
     if isinstance(v, bool):
         return v
     if v.lower() in ('yes', 'true', 't', 'y', '1'):
         raise argparse.ArgumentTypeError('Boolean value expected.')
 def parse_args():
+    """Command line argument specification"""
     parser = argparse.ArgumentParser(description="A minimum working example of applying the watermark to any LLM that supports the huggingface 🤗 `generate` API")
     return args
 def load_model(args):
+    """Load and return the model and tokenizer"""
     args.is_seq2seq_model = any([(model_type in args.model_name_or_path) for model_type in ["t5","T0"]])
     args.is_decoder_only_model = any([(model_type in args.model_name_or_path) for model_type in ["gpt","opt","bloom"]])
     if args.is_seq2seq_model:
     return model, tokenizer, device
 def generate(prompt, args, model=None, device=None, tokenizer=None):
+    """Instatiate the WatermarkLogitsProcessor according to the watermark parameters
+       and generate watermarked text by passing it to the generate method of the model
+       as a logits processor. """
     print(f"Generating with {args}")
     watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
             # decoded_output_with_watermark)
 def format_names(s):
+    """Format names for the gradio demo interface"""
     s=s.replace("num_tokens_scored","Tokens Counted (T)")
     s=s.replace("num_green_tokens","# Tokens in Greenlist")
     s=s.replace("green_fraction","Fraction of T in Greenlist")
     s=s.replace("z_score","z-score")
     s=s.replace("p_value","p value")
     return s
 def list_format_scores(score_dict, detection_threshold):
+    """Format the detection metrics into a gradio dataframe input format"""
     lst_2d = []
     lst_2d.append(["z-score threshold", f"{detection_threshold}"])
     for k,v in score_dict.items():
     return lst_2d
 def detect(input_text, args, device=None, tokenizer=None):
+    """Instantiate the WatermarkDetection object and call detect on
+        the input text returning the scores and outcome of the test"""
     watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
                                         gamma=args.gamma,
                                         seeding_scheme=args.seeding_scheme,
     return output, args
 def run_gradio(args, model=None, device=None, tokenizer=None):
+    """Define and launch the gradio demo interface"""
     generate_partial = partial(generate, model=model, device=device, tokenizer=tokenizer)
     detect_partial = partial(detect, device=device, tokenizer=tokenizer)
         # Top section, greeting and instructions
         gr.Markdown("## 💧 [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) 🔍")
         gr.Markdown("[jwkirchenbauer/lm-watermarking![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)")
+        gr.Markdown(f"Language model: {args.model_name_or_path}")
+        with gr.Accordion("Understanding the output metrics",open=False):
+            gr.Markdown(
+            """
+            - `z-score threshold` : The cuttoff for the hypothesis test
+            - `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
+                The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
+                a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
+                described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
+            - `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
+            - `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
+            - `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
+                we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
+            - `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
+                observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
+                If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
+            -  `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
+            - `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
+                the confidence of the detection based on the unlikeliness of this `z-score` observation.
+            """
+            )
         with gr.Accordion("A note on model capability",open=False):
             gr.Markdown(
                 """
+                The models that can be used in this demo are limited to those that are both open source and that fit on a single commodity GPU.
+                In particular, there aren't many models above a few billion parameters and almost none trained using both Instruction-finetuning an/or RLHF.
+                Therefore, in both it's un-watermarked (normal) and watermarked states, the model is not generally able to respond well to the kinds of prompts that a 100B+ Instruction and RLHF tuned model such as ChatGPT, Claude, or Bard is.
+                We suggest you try prompts that give the model a few sentences and then allow it to 'continue' the prompt, as these weaker models are more capable in this simpler language modeling setting.
+                Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
+                Longer prompts and stopping mid sentence often helps encourage more fluent, longer genrations.
                 """
                 )
                         seed_separately = gr.Checkbox(label="Seed both generations separately", value=args.seed_separately)
                     with gr.Column(scale=1):
                         select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
+        with gr.Accordion("Understanding the settings",open=False):
+            gr.Markdown(
+            """
+            #### Generation Parameters:
+            - Decoding Method : We can generate tokens from the model using either multinomial sampling or we can generate using greedy decoding.
+            - Sampling Temperature : If using multinomial sampling we can set the temperature of the sampling distribution.
+                                0.0 is equivalent to greedy decoding, and 1.0 is the maximum amount of variability/entropy in the next token distribution.
+                                0.7 strikes a nice balance between faithfulness to the model's estimate of top candidates while adding variety. Does not apply for greedy decoding.
+            - Generation Seed : The integer to pass to the torch random number generator before running generation. Makes the multinomial sampling strategy
+                                outputs reproducible. Does not apply for greedy decoding.
+            - Number of Beams : When using greedy decoding, we can also set the number of beams to > 1 to enable beam search.
+                                This is not implemented/excluded from paper for multinomial sampling but may be added in future.
+            - Max Generated Tokens : The `max_new_tokens` parameter passed to the generation method to stop the output at a certain number of new tokens.
+                                    Note that the model is free to generate fewer tokens depending on the prompt.
+                                    Implicitly this sets the maximum number of prompt tokens possible as the model's maximum input length minus `max_new_tokens`,
+                                    and inputs will be truncated accordingly.
+            #### Watermark Parameters:
+            - gamma : The fraction of the vocabulary to be partitioned into the greenlist at each generation step.
+                     Smaller gamma values create a stronger watermark by enabling the watermarked model to achieve
+                     a greater differentiation from human/unwatermarked text because it is preferentially sampling
+                     from a smaller green set making those tokens less likely to occur by chance.
+            - delta : The amount of positive bias to add to the logits of every token in the greenlist
+                        at each generation step before sampling/choosing the next token. Higher delta values
+                        mean that the greenlist tokens are more heavily preferred by the watermarked model
+                        and as the bias becomes very large the watermark transitions from "soft" to "hard".
+                        For a hard watermark, nearly all tokens are green, but this can have a detrimental effect on
+                        generation quality, especially when there is not a lot of flexibility in the distribution.
+            - z-score threshold : the z-score cuttoff for the hypothesis test. Higher thresholds (such as 4.0) make
+                                _false positives_ (predicting that human/unwatermarked text is watermarked) very unlikely
+                                as a genuine human text with a significant number of tokens will almost never achieve
+                                that high of a z-score. Lower thresholds will capture more _true positives_ as some watermarked
+                                texts will contain less green tokens and achive a lower z-score, but still pass the lower bar and
+                                be flagged as "watermarked". However, a lowere threshold will increase the chance that human text
+                                that contains a slightly higher than average number of green tokens is erroneously flagged.
+                                4.0-5.0 offers extremely low false positive rates while still accurately catching most watermarked text.
+            - Ignore Bigram Repeats : This alternate detection algorithm only considers the unique bigrams in the text during detection,
+                                    computing the greenlists based on the first in each pair and checking whether the second falls within the list.
+                                    This means that `T` is now the unique number of bigrams in the text, which becomes less than the total
+                                    number of tokens generated if the text contains a lot of repetition. See the paper for a more detailed discussion.
+            - Normalizations : we implement a few basic normaliations to defend against various adversarial perturbations of the
+                                text analyzed during detection. Currently we support converting all chracters to unicode,
+                                replacing homoglyphs with a canonical form, and standardizing the capitalization.
+                                See the paper for a detailed discussion of input normalization.
+            """
+            )
         gr.HTML("""
+                <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
+                    Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
                 <br/>
                 <a href="https://huggingface.co/spaces/tomg-group-umd/lm-watermarking?duplicate=true">
                 <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
         demo.launch()
 def main(args):
+    """Run a command line version of the generation and detection operations
+        and optionally launch and serve the gradio demo"""
     # Initial arg processing and log
     args.normalizers = (args.normalizers.split(",") if args.normalizers else [])
     print(args)

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 nltk
 scipy
 torch

+gradio
 nltk
 scipy
 torch