chinnadhurai sankar commited on Apr 24, 2024

Commit

e190469

1 Parent(s): 75488ff

initial commit

Files changed (30) hide show

elm-0.25_toxicity_detection/added_tokens.json +3 -0
elm-0.25_toxicity_detection/ckpt.pt +3 -0
elm-0.25_toxicity_detection/config.yaml +1 -0
elm-0.25_toxicity_detection/example_prompts.json +12 -0
elm-0.25_toxicity_detection/merges.txt +0 -0
elm-0.25_toxicity_detection/slicex_elm_config.json +1 -0
elm-0.25_toxicity_detection/special_tokens_map.json +30 -0
elm-0.25_toxicity_detection/tokenizer.json +0 -0
elm-0.25_toxicity_detection/tokenizer_config.json +30 -0
elm-0.25_toxicity_detection/vocab.json +0 -0
elm-0.75_toxicity_detection/added_tokens.json +3 -0
elm-0.75_toxicity_detection/ckpt.pt +3 -0
elm-0.75_toxicity_detection/config.yaml +1 -0
elm-0.75_toxicity_detection/example_prompts.json +12 -0
elm-0.75_toxicity_detection/merges.txt +0 -0
elm-0.75_toxicity_detection/slicex_elm_config.json +1 -0
elm-0.75_toxicity_detection/special_tokens_map.json +30 -0
elm-0.75_toxicity_detection/tokenizer.json +0 -0
elm-0.75_toxicity_detection/tokenizer_config.json +30 -0
elm-0.75_toxicity_detection/vocab.json +0 -0
elm-1.0_toxicity_detection/added_tokens.json +3 -0
elm-1.0_toxicity_detection/ckpt.pt +3 -0
elm-1.0_toxicity_detection/config.yaml +1 -0
elm-1.0_toxicity_detection/example_prompts.json +12 -0
elm-1.0_toxicity_detection/merges.txt +0 -0
elm-1.0_toxicity_detection/slicex_elm_config.json +1 -0
elm-1.0_toxicity_detection/special_tokens_map.json +30 -0
elm-1.0_toxicity_detection/tokenizer.json +0 -0
elm-1.0_toxicity_detection/tokenizer_config.json +30 -0
elm-1.0_toxicity_detection/vocab.json +0 -0

elm-0.25_toxicity_detection/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 50257
+}

elm-0.25_toxicity_detection/ckpt.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81fe18e5d2dbdade0bd6555f97c19338d17d7e8a3d074c9779fa17db98979419
+size 840171202

elm-0.25_toxicity_detection/config.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ name_or_path: elm/rambutan

elm-0.25_toxicity_detection/example_prompts.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "inputs": [
+        "Hi.  You recently reverted List of villains back to a much earlier version.  As you can see on the talk page, there's been a fair amount of discussion about how best to organize the page, and the page is in transition from the earlier alphabetical format to being organized by medium.  If you think this is a bad idea, please feel free to discuss it on the talk page, rather than reverting without an edit summary.  Thank you. \u2014  \u2022",
+        "> \n From 2nd GA Review: \n * Citation clean-up. \n * Update 'Europa Series' section. \n * Update 'Specification' section. \n * Update 'Counterfeiting' section. \n * Reference watermarks in 'Security features' section.",
+        "F**k that s**t, comrade, no one cares about the high school, let alone some hillbilly backwater jazz band.",
+        "Dear Dr. Mereu, \n\n I am very much looking forward to this class. It is my first class at Rutgers! I think its extremely interesting and am very excited about it as I just decided that I want to minor in Psychology this year. I am especially interested in the neuroscience aspect of it all. Looking forward to a great semester!",
+        "You are one slow and persistent motherfucker, can you read?",
+        "==Consistency== \n\n Just for the sake of consistency with the project page guidelines, should the positions of the sections Analysis and criticism and Performance history be switched?",
+        "Arthur MacArthur IV  (Son)  Born February 21, 1938 == \n\n Why is General MacArthur's son with Jean Marie Faircloth MacArthur not mentioned as a relative of the General's?"
+    ],
+    "template": "[INST]You are a helpful, precise, detailed, and concise artificial intelligence assistant. You are a very intelligent and sensitive, having a keen ability to discern whether or not a text message is toxic. You can also be trusted with following the instructions given to you precisely, without deviations.\nIn this task, you are asked to decide whether or not comment text is toxic.\nToxic content harbors negativity towards a person or a group, for instance:\n - stereotyping (especially using negative stereotypes)\n - disparaging a person's gender -- as in \"male\", \"female\", \"men\", \"women\"\n - derogatory language or slurs\n - racism -- as in discriminating toward people who are \"black\", \"white\"\n - cultural appropriation\n - mockery or ridicule\n - sexual objectification\n - homophobia -- bullying people who are \"homosexual\", \"gay\", \"lesbian\"\n - historical insensitivity\n - disrespecting religion -- as in \"christian\", \"jewish\", \"muslim\"\n - saying that certain groups are less worthy of respect\n - insensitivity to health conditions -- as in \"psychiatric/mental illness\"\n\nRead the comment text provided and predict whether or not the comment text is toxic. If comment text is toxic according to the instructions, then the answer is \"yes\" (return \"yes\"); otherwise, the answer is \"no\" (return \"no\").\nOutput the answer only as a \"yes\" or a \"no\"; do not provide explanations.\nPlease, never return empty output; always return a \"yes\" or a \"no\" answer.\nYou will be evaluated based on the following criteria: - The generated answer is always \"yes\" or \"no\" (never the empty string, \"\"). - The generated answer is correct for the comment text presented to you.\n### Comment Text: {input}\n### Comment Text Is Toxic (Yes/No)  [/INST]"
+}

elm-0.25_toxicity_detection/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

elm-0.25_toxicity_detection/slicex_elm_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_name_or_path": "elm_rambutan"}

elm-0.25_toxicity_detection/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

elm-0.25_toxicity_detection/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

elm-0.25_toxicity_detection/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1024,
+  "pad_token": "[PAD]",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

elm-0.25_toxicity_detection/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

elm-0.75_toxicity_detection/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 50257
+}

elm-0.75_toxicity_detection/ckpt.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:505ee87a70b47ab632b77349e8b5c89a1da92ae31c3c95de0b968fb1e047d206
+size 1696314274

elm-0.75_toxicity_detection/config.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ name_or_path: elm/rambutan

elm-0.75_toxicity_detection/example_prompts.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "inputs": [
+        "Hi.  You recently reverted List of villains back to a much earlier version.  As you can see on the talk page, there's been a fair amount of discussion about how best to organize the page, and the page is in transition from the earlier alphabetical format to being organized by medium.  If you think this is a bad idea, please feel free to discuss it on the talk page, rather than reverting without an edit summary.  Thank you. \u2014  \u2022",
+        "> \n From 2nd GA Review: \n * Citation clean-up. \n * Update 'Europa Series' section. \n * Update 'Specification' section. \n * Update 'Counterfeiting' section. \n * Reference watermarks in 'Security features' section.",
+        "F**k that s**t, comrade, no one cares about the high school, let alone some hillbilly backwater jazz band.",
+        "Dear Dr. Mereu, \n\n I am very much looking forward to this class. It is my first class at Rutgers! I think its extremely interesting and am very excited about it as I just decided that I want to minor in Psychology this year. I am especially interested in the neuroscience aspect of it all. Looking forward to a great semester!",
+        "You are one slow and persistent motherfucker, can you read?",
+        "==Consistency== \n\n Just for the sake of consistency with the project page guidelines, should the positions of the sections Analysis and criticism and Performance history be switched?",
+        "Arthur MacArthur IV  (Son)  Born February 21, 1938 == \n\n Why is General MacArthur's son with Jean Marie Faircloth MacArthur not mentioned as a relative of the General's?"
+    ],
+    "template": "[INST]You are a helpful, precise, detailed, and concise artificial intelligence assistant. You are a very intelligent and sensitive, having a keen ability to discern whether or not a text message is toxic. You can also be trusted with following the instructions given to you precisely, without deviations.\nIn this task, you are asked to decide whether or not comment text is toxic.\nToxic content harbors negativity towards a person or a group, for instance:\n - stereotyping (especially using negative stereotypes)\n - disparaging a person's gender -- as in \"male\", \"female\", \"men\", \"women\"\n - derogatory language or slurs\n - racism -- as in discriminating toward people who are \"black\", \"white\"\n - cultural appropriation\n - mockery or ridicule\n - sexual objectification\n - homophobia -- bullying people who are \"homosexual\", \"gay\", \"lesbian\"\n - historical insensitivity\n - disrespecting religion -- as in \"christian\", \"jewish\", \"muslim\"\n - saying that certain groups are less worthy of respect\n - insensitivity to health conditions -- as in \"psychiatric/mental illness\"\n\nRead the comment text provided and predict whether or not the comment text is toxic. If comment text is toxic according to the instructions, then the answer is \"yes\" (return \"yes\"); otherwise, the answer is \"no\" (return \"no\").\nOutput the answer only as a \"yes\" or a \"no\"; do not provide explanations.\nPlease, never return empty output; always return a \"yes\" or a \"no\" answer.\nYou will be evaluated based on the following criteria: - The generated answer is always \"yes\" or \"no\" (never the empty string, \"\"). - The generated answer is correct for the comment text presented to you.\n### Comment Text: {input}\n### Comment Text Is Toxic (Yes/No)  [/INST]"
+}

elm-0.75_toxicity_detection/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

elm-0.75_toxicity_detection/slicex_elm_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_name_or_path": "elm_rambutan"}

elm-0.75_toxicity_detection/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

elm-0.75_toxicity_detection/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

elm-0.75_toxicity_detection/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1024,
+  "pad_token": "[PAD]",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

elm-0.75_toxicity_detection/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

elm-1.0_toxicity_detection/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 50257
+}

elm-1.0_toxicity_detection/ckpt.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a505bca6181c66afba307f97c9e5ce43ba97a8013ba1809afcbece2c40c36d0
+size 2124385874

elm-1.0_toxicity_detection/config.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ name_or_path: elm/rambutan

elm-1.0_toxicity_detection/example_prompts.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "inputs": [
+        "Hi.  You recently reverted List of villains back to a much earlier version.  As you can see on the talk page, there's been a fair amount of discussion about how best to organize the page, and the page is in transition from the earlier alphabetical format to being organized by medium.  If you think this is a bad idea, please feel free to discuss it on the talk page, rather than reverting without an edit summary.  Thank you. \u2014  \u2022",
+        "> \n From 2nd GA Review: \n * Citation clean-up. \n * Update 'Europa Series' section. \n * Update 'Specification' section. \n * Update 'Counterfeiting' section. \n * Reference watermarks in 'Security features' section.",
+        "F**k that s**t, comrade, no one cares about the high school, let alone some hillbilly backwater jazz band.",
+        "Dear Dr. Mereu, \n\n I am very much looking forward to this class. It is my first class at Rutgers! I think its extremely interesting and am very excited about it as I just decided that I want to minor in Psychology this year. I am especially interested in the neuroscience aspect of it all. Looking forward to a great semester!",
+        "You are one slow and persistent motherfucker, can you read?",
+        "==Consistency== \n\n Just for the sake of consistency with the project page guidelines, should the positions of the sections Analysis and criticism and Performance history be switched?",
+        "Arthur MacArthur IV  (Son)  Born February 21, 1938 == \n\n Why is General MacArthur's son with Jean Marie Faircloth MacArthur not mentioned as a relative of the General's?"
+    ],
+    "template": "[INST]You are a helpful, precise, detailed, and concise artificial intelligence assistant. You are a very intelligent and sensitive, having a keen ability to discern whether or not a text message is toxic. You can also be trusted with following the instructions given to you precisely, without deviations.\nIn this task, you are asked to decide whether or not comment text is toxic.\nToxic content harbors negativity towards a person or a group, for instance:\n - stereotyping (especially using negative stereotypes)\n - disparaging a person's gender -- as in \"male\", \"female\", \"men\", \"women\"\n - derogatory language or slurs\n - racism -- as in discriminating toward people who are \"black\", \"white\"\n - cultural appropriation\n - mockery or ridicule\n - sexual objectification\n - homophobia -- bullying people who are \"homosexual\", \"gay\", \"lesbian\"\n - historical insensitivity\n - disrespecting religion -- as in \"christian\", \"jewish\", \"muslim\"\n - saying that certain groups are less worthy of respect\n - insensitivity to health conditions -- as in \"psychiatric/mental illness\"\n\nRead the comment text provided and predict whether or not the comment text is toxic. If comment text is toxic according to the instructions, then the answer is \"yes\" (return \"yes\"); otherwise, the answer is \"no\" (return \"no\").\nOutput the answer only as a \"yes\" or a \"no\"; do not provide explanations.\nPlease, never return empty output; always return a \"yes\" or a \"no\" answer.\nYou will be evaluated based on the following criteria: - The generated answer is always \"yes\" or \"no\" (never the empty string, \"\"). - The generated answer is correct for the comment text presented to you.\n### Comment Text: {input}\n### Comment Text Is Toxic (Yes/No)  [/INST]"
+}

elm-1.0_toxicity_detection/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

elm-1.0_toxicity_detection/slicex_elm_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_name_or_path": "elm_rambutan"}

elm-1.0_toxicity_detection/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

elm-1.0_toxicity_detection/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

elm-1.0_toxicity_detection/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1024,
+  "pad_token": "[PAD]",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

elm-1.0_toxicity_detection/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff