Spaces:
Runtime error
Runtime error
Upload 16 files
Browse files- detect-pretrain-code-contamination +0 -1
- detect-pretrain-code-contamination/README.md +17 -0
- detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-311.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-39.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/eval.cpython-311.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/eval.cpython-39.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/options.cpython-311.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/options.cpython-39.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/run.cpython-311.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/utils.cpython-311.pyc +0 -0
- detect-pretrain-code-contamination/src/__pycache__/utils.cpython-39.pyc +0 -0
- detect-pretrain-code-contamination/src/analyze.py +47 -0
- detect-pretrain-code-contamination/src/eval.py +178 -0
- detect-pretrain-code-contamination/src/options.py +23 -0
- detect-pretrain-code-contamination/src/run.py +230 -0
- detect-pretrain-code-contamination/src/scripts/run.sh +8 -0
- detect-pretrain-code-contamination/src/utils.py +28 -0
detect-pretrain-code-contamination
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 616114e2334dc8dc8b7b538f6dbcc639cc42cb2c
|
|
|
|
detect-pretrain-code-contamination/README.md
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Detect-Pretrain-Code-Contamination
|
2 |
+
|
3 |
+
This repository contains scripts for detecting pretraining code contamination in datasets.
|
4 |
+
|
5 |
+
## Datasets
|
6 |
+
You can specify the dataset for analysis. Example datasets include `truthful_qa` and `cais/mmlu`.
|
7 |
+
|
8 |
+
## Usage
|
9 |
+
Run the script with the desired models and dataset. Below are two examples of how to use the script with different models and the `truthful_qa` dataset.
|
10 |
+
|
11 |
+
### Example 1:
|
12 |
+
```bash
|
13 |
+
DATASET=truthful_qa
|
14 |
+
python src/run.py --target_model Fredithefish/ReasonixPajama-3B-HF --ref_model huggyllama/llama-7b --data $DATASET --output_dir out/$DATASET --ratio_gen 0.4
|
15 |
+
```
|
16 |
+
|
17 |
+
The output of the script provides a metric for dataset contamination. If #the result < 0.1# with a percentage greater than 0.85, it is highly likely that the dataset has been trained.
|
detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-311.pyc
ADDED
Binary file (2.16 kB). View file
|
|
detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-39.pyc
ADDED
Binary file (1.27 kB). View file
|
|
detect-pretrain-code-contamination/src/__pycache__/eval.cpython-311.pyc
ADDED
Binary file (9.99 kB). View file
|
|
detect-pretrain-code-contamination/src/__pycache__/eval.cpython-39.pyc
ADDED
Binary file (4.68 kB). View file
|
|
detect-pretrain-code-contamination/src/__pycache__/options.cpython-311.pyc
ADDED
Binary file (2.46 kB). View file
|
|
detect-pretrain-code-contamination/src/__pycache__/options.cpython-39.pyc
ADDED
Binary file (1.45 kB). View file
|
|
detect-pretrain-code-contamination/src/__pycache__/run.cpython-311.pyc
ADDED
Binary file (13.5 kB). View file
|
|
detect-pretrain-code-contamination/src/__pycache__/utils.cpython-311.pyc
ADDED
Binary file (3.49 kB). View file
|
|
detect-pretrain-code-contamination/src/__pycache__/utils.cpython-39.pyc
ADDED
Binary file (1.53 kB). View file
|
|
detect-pretrain-code-contamination/src/analyze.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import statistics
|
3 |
+
|
4 |
+
def load_jsonl(path):
|
5 |
+
with open(path) as f:
|
6 |
+
data = [json.loads(line) for line in f]
|
7 |
+
return data
|
8 |
+
|
9 |
+
def analyze_data(data):
|
10 |
+
all_rmia = []
|
11 |
+
all_large_1 = []
|
12 |
+
for ex in data:
|
13 |
+
# Min_20.0% Prob
|
14 |
+
score = ex["pred"]["minkprob_w/_ref"] # minkprob_w/_ref
|
15 |
+
all_rmia.append(score)
|
16 |
+
if score < 0.1:
|
17 |
+
all_large_1.append(score)
|
18 |
+
result = "result < 0.1, %: ", len(all_large_1)/len(all_rmia)
|
19 |
+
print(result)
|
20 |
+
return result
|
21 |
+
# print(f"RMIA mean: {statistics.mean(all_rmia)}")
|
22 |
+
# print(f"RMIA std: {statistics.stdev(all_rmia)}")
|
23 |
+
# print(f"RMIA min: {min(all_rmia)}")
|
24 |
+
# print(f"RMIA max: {max(all_rmia)}")
|
25 |
+
# # 25% percentile
|
26 |
+
# print(f"RMIA 25%: {statistics.quantiles(all_rmia)[0]}")
|
27 |
+
# # 50% percentile
|
28 |
+
# print(f"RMIA 50%: {statistics.quantiles(all_rmia)[1]}")
|
29 |
+
# # 75% percentile
|
30 |
+
# print(f"RMIA 75%: {statistics.quantiles(all_rmia)[2]}")
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
if __name__ == "__main__":
|
36 |
+
print("contaminated model")
|
37 |
+
task = "ai2_arc" # ai2_arc cais/mmlu truthful_qa
|
38 |
+
# /fsx-onellm/swj0419/attack/test_contamination/detect-pretrain-code/out/ai2_arc/Fredithefish/ReasonixPajama-3B-HF_togethercomputer/RedPajama-INCITE-Chat-3B-v1/input/all_output.jsonl
|
39 |
+
path = f"/fsx-onellm/swj0419/attack/test_contamination/detect-pretrain-code/out/{task}/Fredithefish/ReasonixPajama-3B-HF_huggyllama/llama-7b/input/all_output.jsonl"
|
40 |
+
data = load_jsonl(path)
|
41 |
+
analyze_data(data)
|
42 |
+
|
43 |
+
print("raw model")
|
44 |
+
path = f"/fsx-onellm/swj0419/attack/test_contamination/detect-pretrain-code/out/{task}/togethercomputer/RedPajama-INCITE-Chat-3B-v1_huggyllama/llama-7b/input/all_output.jsonl"
|
45 |
+
data = load_jsonl(path)
|
46 |
+
analyze_data(data)
|
47 |
+
|
detect-pretrain-code-contamination/src/eval.py
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
logging.basicConfig(level='ERROR')
|
3 |
+
import numpy as np
|
4 |
+
from tqdm import tqdm
|
5 |
+
import json
|
6 |
+
from collections import defaultdict
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from sklearn.metrics import auc, roc_curve
|
9 |
+
import matplotlib
|
10 |
+
import random
|
11 |
+
from ipdb import set_trace as bp
|
12 |
+
import time
|
13 |
+
|
14 |
+
matplotlib.rcParams['pdf.fonttype'] = 42
|
15 |
+
matplotlib.rcParams['ps.fonttype'] = 42
|
16 |
+
|
17 |
+
|
18 |
+
matplotlib.rcParams['pdf.fonttype'] = 42
|
19 |
+
matplotlib.rcParams['ps.fonttype'] = 42
|
20 |
+
|
21 |
+
# plot data
|
22 |
+
def sweep(score, x):
|
23 |
+
"""
|
24 |
+
Compute a ROC curve and then return the FPR, TPR, AUC, and ACC.
|
25 |
+
"""
|
26 |
+
fpr, tpr, _ = roc_curve(x, -score)
|
27 |
+
acc = np.max(1-(fpr+(1-tpr))/2)
|
28 |
+
return fpr, tpr, auc(fpr, tpr), acc
|
29 |
+
|
30 |
+
|
31 |
+
def do_plot(prediction, answers, sweep_fn=sweep, metric='auc', legend="", output_dir=None):
|
32 |
+
"""
|
33 |
+
Generate the ROC curves by using ntest models as test models and the rest to train.
|
34 |
+
"""
|
35 |
+
fpr, tpr, auc, acc = sweep_fn(np.array(prediction), np.array(answers, dtype=bool))
|
36 |
+
|
37 |
+
low = tpr[np.where(fpr<.05)[0][-1]]
|
38 |
+
# bp()
|
39 |
+
print('Attack %s AUC %.4f, Accuracy %.4f, TPR@5%%FPR of %.4f\n'%(legend, auc,acc, low))
|
40 |
+
|
41 |
+
metric_text = ''
|
42 |
+
if metric == 'auc':
|
43 |
+
metric_text = 'auc=%.3f'%auc
|
44 |
+
elif metric == 'acc':
|
45 |
+
metric_text = 'acc=%.3f'%acc
|
46 |
+
|
47 |
+
plt.plot(fpr, tpr, label=legend+metric_text)
|
48 |
+
return legend, auc,acc, low
|
49 |
+
|
50 |
+
|
51 |
+
def fig_fpr_tpr(all_output, output_dir):
|
52 |
+
print("output_dir", output_dir)
|
53 |
+
answers = []
|
54 |
+
metric2predictions = defaultdict(list)
|
55 |
+
for ex in all_output:
|
56 |
+
answers.append(ex["label"])
|
57 |
+
for metric in ex["pred"].keys():
|
58 |
+
if ("raw" in metric) and ("clf" not in metric):
|
59 |
+
continue
|
60 |
+
metric2predictions[metric].append(ex["pred"][metric])
|
61 |
+
|
62 |
+
plt.figure(figsize=(4,3))
|
63 |
+
with open(f"{output_dir}/auc.txt", "w") as f:
|
64 |
+
for metric, predictions in metric2predictions.items():
|
65 |
+
legend, auc, acc, low = do_plot(predictions, answers, legend=metric, metric='auc', output_dir=output_dir)
|
66 |
+
f.write('%s AUC %.4f, Accuracy %.4f, TPR@0.1%%FPR of %.4f\n'%(legend, auc, acc, low))
|
67 |
+
|
68 |
+
plt.semilogx()
|
69 |
+
plt.semilogy()
|
70 |
+
plt.xlim(1e-5,1)
|
71 |
+
plt.ylim(1e-5,1)
|
72 |
+
plt.xlabel("False Positive Rate")
|
73 |
+
plt.ylabel("True Positive Rate")
|
74 |
+
plt.plot([0, 1], [0, 1], ls='--', color='gray')
|
75 |
+
plt.subplots_adjust(bottom=.18, left=.18, top=.96, right=.96)
|
76 |
+
plt.legend(fontsize=8)
|
77 |
+
plt.savefig(f"{output_dir}/auc.png")
|
78 |
+
|
79 |
+
|
80 |
+
def load_jsonl(input_path):
|
81 |
+
with open(input_path, 'r') as f:
|
82 |
+
data = [json.loads(line) for line in tqdm(f)]
|
83 |
+
random.seed(0)
|
84 |
+
random.shuffle(data)
|
85 |
+
return data
|
86 |
+
|
87 |
+
def dump_jsonl(data, path):
|
88 |
+
with open(path, 'w') as f:
|
89 |
+
for line in tqdm(data):
|
90 |
+
f.write(json.dumps(line) + "\n")
|
91 |
+
|
92 |
+
def read_jsonl(path):
|
93 |
+
with open(path, 'r') as f:
|
94 |
+
return [json.loads(line) for line in tqdm(f)]
|
95 |
+
|
96 |
+
def convert_huggingface_data_to_list_dic(dataset):
|
97 |
+
all_data = []
|
98 |
+
for i in range(len(dataset)):
|
99 |
+
ex = dataset[i]
|
100 |
+
all_data.append(ex)
|
101 |
+
return all_data
|
102 |
+
|
103 |
+
|
104 |
+
def process_truthful_qa(data):
|
105 |
+
new_data = []
|
106 |
+
for ex in data:
|
107 |
+
new_ex = {}
|
108 |
+
label = ex["mc2_targets"]["labels"].index(1)
|
109 |
+
output = ex["mc2_targets"]["choices"][label]
|
110 |
+
# We change to mc2 instead of mc1 as it's those that open llm lead uses. (check about)
|
111 |
+
new_ex["output"] = output
|
112 |
+
new_ex["input"] = ex["question"] + " " + output
|
113 |
+
new_data.append(new_ex)
|
114 |
+
return new_data
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
def process_mmlu(data):
|
119 |
+
new_data = []
|
120 |
+
for ex in data:
|
121 |
+
new_ex = {}
|
122 |
+
label = ex["choices"][ex["answer"]]
|
123 |
+
output = label
|
124 |
+
new_ex["output"] = output
|
125 |
+
new_ex["input"] = ex["question"] + " " + output
|
126 |
+
new_data.append(new_ex)
|
127 |
+
return new_data
|
128 |
+
|
129 |
+
|
130 |
+
def process_arc(data):
|
131 |
+
new_data = []
|
132 |
+
choice2label = {"A": 0, "B": 1, "C": 2, "D": 3}
|
133 |
+
for ex in data:
|
134 |
+
new_ex = {}
|
135 |
+
# bp()
|
136 |
+
# print(ex["answerKey"])
|
137 |
+
if ex["answerKey"] not in choice2label:
|
138 |
+
continue
|
139 |
+
label = choice2label[ex["answerKey"]]
|
140 |
+
output = ex["choices"]["text"][label]
|
141 |
+
new_ex["output"] = output
|
142 |
+
new_ex["input"] = ex["question"] + " " + output
|
143 |
+
new_data.append(new_ex)
|
144 |
+
return new_data
|
145 |
+
|
146 |
+
def process_gsm8k(data):
|
147 |
+
new_data = []
|
148 |
+
for ex in data:
|
149 |
+
new_ex = {}
|
150 |
+
#label = ;;
|
151 |
+
output = ex["answer"]
|
152 |
+
new_ex["output"] = output
|
153 |
+
new_ex["input"] = ex["question"] + " " + output
|
154 |
+
new_data.append(new_ex)
|
155 |
+
return new_data
|
156 |
+
|
157 |
+
def process_winogrande(data):
|
158 |
+
new_data = []
|
159 |
+
for ex in data:
|
160 |
+
new_ex = {}
|
161 |
+
label = int(ex["answer"])
|
162 |
+
output = ex[f"option{label}"]
|
163 |
+
new_ex["output"] = output
|
164 |
+
new_ex["input"] = ex["sentence"] + " " + output
|
165 |
+
new_data.append(new_ex)
|
166 |
+
return new_data
|
167 |
+
# I'm not sure if that's the correct format for winogrande given how the dataset works.
|
168 |
+
|
169 |
+
def process_hellaswag(data):
|
170 |
+
new_data = []
|
171 |
+
for ex in data:
|
172 |
+
new_ex = {}
|
173 |
+
label = int(ex["label"]) # For some reason label is in str and not int?
|
174 |
+
output = ex["endings"][label]
|
175 |
+
new_ex["output"] = output
|
176 |
+
new_ex["input"] = ex["ctx"] + " " + output
|
177 |
+
new_data.append(new_ex)
|
178 |
+
return new_data
|
detect-pretrain-code-contamination/src/options.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
from pathlib import Path
|
4 |
+
import logging
|
5 |
+
|
6 |
+
logger = logging.getLogger(__name__)
|
7 |
+
|
8 |
+
class Options():
|
9 |
+
def __init__(self):
|
10 |
+
self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
11 |
+
self.initialize_parser()
|
12 |
+
|
13 |
+
def initialize_parser(self):
|
14 |
+
self.parser.add_argument('--target_model', type=str, default="text-davinci-003", help="the model to attack: huggyllama/llama-65b, text-davinci-003")
|
15 |
+
self.parser.add_argument('--ref_model', type=str, default="huggyllama/llama-7b")
|
16 |
+
self.parser.add_argument('--output_dir', type=str, default="out")
|
17 |
+
self.parser.add_argument('--data', type=str, default="swj0419/WikiMIA", help="the dataset to evaluate: default is WikiMIA")
|
18 |
+
self.parser.add_argument('--length', type=int, default=64, help="the length of the input text to evaluate. Choose from 32, 64, 128, 256")
|
19 |
+
self.parser.add_argument('--key_name', type=str, default="input", help="the key name corresponding to the input text. Selecting from: input, parapgrase")
|
20 |
+
self.parser.add_argument('--ratio_gen', type=float, default=0.4)
|
21 |
+
|
22 |
+
|
23 |
+
|
detect-pretrain-code-contamination/src/run.py
ADDED
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
logging.basicConfig(level='ERROR')
|
3 |
+
import numpy as np
|
4 |
+
from pathlib import Path
|
5 |
+
import openai
|
6 |
+
import torch
|
7 |
+
import zlib
|
8 |
+
import statistics
|
9 |
+
from torch.utils.data import DataLoader
|
10 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
11 |
+
from tqdm import tqdm
|
12 |
+
import math
|
13 |
+
import numpy as np
|
14 |
+
from datasets import load_dataset
|
15 |
+
from options import Options
|
16 |
+
from ipdb import set_trace as bp
|
17 |
+
from eval import *
|
18 |
+
from utils import evaluate_model
|
19 |
+
from analyze import analyze_data
|
20 |
+
import argparse
|
21 |
+
import os
|
22 |
+
import sys
|
23 |
+
import gc
|
24 |
+
import pickle
|
25 |
+
|
26 |
+
def save_data(filename, data):
|
27 |
+
with open(filename, 'wb') as filehandle:
|
28 |
+
# store the data as binary data stream
|
29 |
+
pickle.dump(data, filehandle)
|
30 |
+
|
31 |
+
def load_data(filename):
|
32 |
+
with open(filename, 'rb') as filehandle:
|
33 |
+
# read the data as binary data stream
|
34 |
+
loaded_data = pickle.load(filehandle)
|
35 |
+
|
36 |
+
return loaded_data
|
37 |
+
|
38 |
+
def unload_model(model,tokenizer):
|
39 |
+
model = model.cpu()
|
40 |
+
del model
|
41 |
+
del tokenizer
|
42 |
+
time.sleep(0.5)
|
43 |
+
gc.collect()
|
44 |
+
torch.cuda.empty_cache()
|
45 |
+
|
46 |
+
def load_model(name1):
|
47 |
+
model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
|
48 |
+
model1.eval()
|
49 |
+
tokenizer1 = AutoTokenizer.from_pretrained(name1)
|
50 |
+
|
51 |
+
tokenizer1.pad_token = tokenizer1.eos_token
|
52 |
+
return model1, tokenizer1
|
53 |
+
|
54 |
+
def calculatePerplexity(sentence, model, tokenizer, gpu):
|
55 |
+
"""
|
56 |
+
exp(loss)
|
57 |
+
"""
|
58 |
+
input_ids = torch.tensor(tokenizer.encode(sentence)).unsqueeze(0)
|
59 |
+
input_ids = input_ids.to(gpu)
|
60 |
+
with torch.no_grad():
|
61 |
+
outputs = model(input_ids, labels=input_ids)
|
62 |
+
loss, logits = outputs[:2]
|
63 |
+
|
64 |
+
'''
|
65 |
+
extract logits:
|
66 |
+
'''
|
67 |
+
# Apply softmax to the logits to get probabilities
|
68 |
+
probabilities = torch.nn.functional.log_softmax(logits, dim=-1)
|
69 |
+
# probabilities = torch.nn.functional.softmax(logits, dim=-1)
|
70 |
+
all_prob = []
|
71 |
+
input_ids_processed = input_ids[0][1:]
|
72 |
+
|
73 |
+
for i, token_id in enumerate(input_ids_processed):
|
74 |
+
probability = probabilities[0, i, token_id].item()
|
75 |
+
all_prob.append(probability)
|
76 |
+
return torch.exp(loss).item(), all_prob, loss.item()
|
77 |
+
|
78 |
+
def sample_generation(sentence, model, tokenizer, args):
|
79 |
+
half_sentence_index = math.ceil(len(sentence.split())*args['prefix_length'])
|
80 |
+
|
81 |
+
if half_sentence_index > 0:
|
82 |
+
prefix = " ".join(sentence.split()[:half_sentence_index])
|
83 |
+
else:
|
84 |
+
prefix = '<|startoftext|> '
|
85 |
+
|
86 |
+
input_ids = torch.tensor(tokenizer.encode(prefix)).unsqueeze(0)
|
87 |
+
input_ids = input_ids.to(model.device)
|
88 |
+
|
89 |
+
output = model.generate(input_ids, max_new_tokens=len(sentence.split())-half_sentence_index, min_new_tokens=1, num_return_sequences=args['num_z'], pad_token_id=tokenizer.eos_token_id, **args['generate_args'])
|
90 |
+
# print(output)
|
91 |
+
complete_generated_text = tokenizer.batch_decode(output, skip_special_tokens=True)
|
92 |
+
|
93 |
+
return complete_generated_text
|
94 |
+
|
95 |
+
|
96 |
+
def RMIA_1(text,target_loss,ref_loss,model1,tokenizer1,ratio_gen,neighbors_dl):
|
97 |
+
target_losses_z = evaluate_model(model1,tokenizer1,neighbors_dl)
|
98 |
+
result = torch.count_nonzero(target_losses_z < target_loss).item() / len(target_losses_z)
|
99 |
+
return result
|
100 |
+
|
101 |
+
def get_neighbors(text,ref_loss,model2,tokenizer2,ratio_gen):
|
102 |
+
cur_args = {'prefix_length': ratio_gen, 'num_z': 100, 'generate_args': {'do_sample': True}}
|
103 |
+
neighbors = sample_generation(text, model2, tokenizer2, cur_args)
|
104 |
+
neighbors_dl = DataLoader(neighbors, batch_size=32, shuffle=False)
|
105 |
+
return neighbors_dl
|
106 |
+
|
107 |
+
def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_name):
|
108 |
+
print(f"all data size: {len(test_data)}")
|
109 |
+
random.seed(0)
|
110 |
+
random.shuffle(test_data)
|
111 |
+
test_data = test_data[:100]
|
112 |
+
|
113 |
+
inference2_pass = None
|
114 |
+
neighbors_dls = None
|
115 |
+
ref_model_clean = ref_model.replace("/","-")
|
116 |
+
data_name_clean = data_name.replace("/","-")
|
117 |
+
os.makedirs(os.path.join(f"saves/{ref_model_clean}",f"{data_name_clean}"),exist_ok=True)
|
118 |
+
try:
|
119 |
+
inference2_pass = load_data(f'saves/{ref_model_clean}/{data_name_clean}/inference2_pass.txt')
|
120 |
+
neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
|
121 |
+
except:
|
122 |
+
### MODEL 2 likelihoods
|
123 |
+
model2, tokenizer2 = load_model(ref_model)
|
124 |
+
inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
|
125 |
+
for ex in tqdm(test_data):
|
126 |
+
text = ex[col_name]
|
127 |
+
new_ex = inference_model2(model2, tokenizer2, text)
|
128 |
+
inference2_pass.append(new_ex)
|
129 |
+
# Invariant. Doesn't take in model1 so I'm good
|
130 |
+
|
131 |
+
### Neighbors:
|
132 |
+
neighbors_dls = []
|
133 |
+
counter = 0
|
134 |
+
for ex in tqdm(test_data):
|
135 |
+
text = ex[col_name]
|
136 |
+
new_ex = get_neighbors(text,inference2_pass[counter][2],model2,tokenizer2,ratio_gen)
|
137 |
+
counter = counter + 1
|
138 |
+
neighbors_dls.append(new_ex)
|
139 |
+
unload_model(model2,tokenizer2)
|
140 |
+
# Because it uses temp it is not invariant, however taking a snapshot in time should be just fine.
|
141 |
+
save_data(f'saves/{ref_model_clean}/{data_name_clean}/inference2_pass.txt',inference2_pass)
|
142 |
+
save_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt',neighbors_dls)
|
143 |
+
print("Saved ref data, exiting.")
|
144 |
+
|
145 |
+
### MODEL 1 likelihoods
|
146 |
+
model1, tokenizer1 = load_model(target_model)
|
147 |
+
inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
|
148 |
+
for ex in tqdm(test_data):
|
149 |
+
text = ex[col_name]
|
150 |
+
new_ex = inference_model1(model1,tokenizer1,text)
|
151 |
+
inference1_pass.append(new_ex)
|
152 |
+
|
153 |
+
### RIMA results
|
154 |
+
model1, tokenizer1 = load_model(target_model)
|
155 |
+
counter = 0
|
156 |
+
results = []
|
157 |
+
for ex in tqdm(test_data):
|
158 |
+
text = ex[col_name]
|
159 |
+
new_ex = RMIA_1(text,inference1_pass[counter][2],inference2_pass[counter][2],model1,tokenizer1,ratio_gen,neighbors_dls[counter])
|
160 |
+
counter = counter + 1
|
161 |
+
results.append(new_ex)
|
162 |
+
unload_model(model1,tokenizer1)
|
163 |
+
|
164 |
+
### Inference ex
|
165 |
+
all_output = []
|
166 |
+
counter = 0
|
167 |
+
for ex in tqdm(test_data):
|
168 |
+
text = ex[col_name]
|
169 |
+
pred = {}
|
170 |
+
pred["minkprob_w/_ref"] = results[counter]
|
171 |
+
pred["ppl"] = inference1_pass[counter][0]
|
172 |
+
pred["ppl/Ref_ppl (calibrate PPL to the reference model)"] = inference1_pass[counter][2]-inference2_pass[counter][2]
|
173 |
+
pred["ppl/lowercase_ppl"] = -(np.log(inference1_pass[counter][3]) / np.log(inference1_pass[counter][0])).item()
|
174 |
+
zlib_entropy = len(zlib.compress(bytes(text, 'utf-8')))
|
175 |
+
pred["ppl/zlib"] = np.log(inference1_pass[counter][0])/zlib_entropy
|
176 |
+
ex["pred"] = pred
|
177 |
+
counter = counter + 1
|
178 |
+
all_output.append(ex)
|
179 |
+
return all_output
|
180 |
+
|
181 |
+
def inference_model1 (model1, tokenizer1, text):
|
182 |
+
p1, all_prob, p1_likelihood = calculatePerplexity(text, model1, tokenizer1, gpu=model1.device)
|
183 |
+
p_lower, _, p_lower_likelihood = calculatePerplexity(text.lower(), model1, tokenizer1, gpu=model1.device)
|
184 |
+
return [p1, all_prob, p1_likelihood, p_lower, p_lower_likelihood]
|
185 |
+
|
186 |
+
def inference_model2 (model2, tokenizer2, text):
|
187 |
+
p_ref, all_prob_ref, p_ref_likelihood = calculatePerplexity(text, model2, tokenizer2, gpu=model2.device)
|
188 |
+
return [p_ref,all_prob_ref,p_ref_likelihood]
|
189 |
+
|
190 |
+
def main(target_model,ref_model,output_dir,data,length,key_name,ratio_gen):
|
191 |
+
output_dir = f"{output_dir}/{target_model}_{ref_model}/{key_name}"
|
192 |
+
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
193 |
+
# load model and data
|
194 |
+
data_name = data
|
195 |
+
if "jsonl" in data:
|
196 |
+
data = load_jsonl(f"{data}")
|
197 |
+
elif data == "truthful_qa":
|
198 |
+
# bp()
|
199 |
+
dataset = load_dataset(data, "multiple_choice", split="validation")
|
200 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
201 |
+
data = process_truthful_qa(data)
|
202 |
+
elif data == "cais/mmlu":
|
203 |
+
dataset = load_dataset(data, "all", split="test")
|
204 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
205 |
+
data = process_mmlu(data)
|
206 |
+
elif data == "ai2_arc":
|
207 |
+
dataset = load_dataset(data, "ARC-Challenge", split="test")
|
208 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
209 |
+
data = process_arc(data)
|
210 |
+
elif data == "gsm8k":
|
211 |
+
dataset = load_dataset(data, "main", split="test")
|
212 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
213 |
+
data = process_gsm8k(data)
|
214 |
+
elif data == "Rowan/hellaswag":
|
215 |
+
dataset = load_dataset(data, "default", split="validation")
|
216 |
+
# We use validation since labels for the test set are not available?
|
217 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
218 |
+
data = process_hellaswag(data)
|
219 |
+
elif data == "winogrande":
|
220 |
+
dataset = load_dataset(data,"winogrande_debiased", split="validation")
|
221 |
+
data = convert_huggingface_data_to_list_dic(dataset)
|
222 |
+
data = process_winogrande(data)
|
223 |
+
|
224 |
+
#model1, model2, tokenizer1, tokenizer2 = load_model(target_model, ref_model)
|
225 |
+
|
226 |
+
all_output = evaluate_data(data,key_name, target_model, ref_model,ratio_gen,data_name)
|
227 |
+
dump_jsonl(all_output, f"{output_dir}/all_output.jsonl")
|
228 |
+
return analyze_data(all_output)
|
229 |
+
# fig_fpr_tpr(all_output, output_dir)
|
230 |
+
|
detect-pretrain-code-contamination/src/scripts/run.sh
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
DATASET=truthful_qa #cais/mmlu #truthful_qa
|
3 |
+
python src/run.py --target_model Fredithefish/ReasonixPajama-3B-HF --ref_model huggyllama/llama-7b --data $DATASET --output_dir out/$DATASET --ratio_gen 0.4
|
4 |
+
|
5 |
+
|
6 |
+
# DATASET=cais/mmlu #cais/mmlu #truthful_qa
|
7 |
+
DATASET=truthful_qa #cais/mmlu #truthful_qa
|
8 |
+
python src/run.py --target_model togethercomputer/RedPajama-INCITE-Chat-3B-v1 --ref_model huggyllama/llama-7b --data $DATASET --output_dir out/$DATASET --ratio_gen 0.4
|
detect-pretrain-code-contamination/src/utils.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from tqdm import tqdm
|
2 |
+
import torch
|
3 |
+
from torch.nn import CrossEntropyLoss
|
4 |
+
|
5 |
+
def evaluate_model(model, tokenizer, dl):
|
6 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
7 |
+
model = model.to(device)
|
8 |
+
losses = []
|
9 |
+
for batch in dl:
|
10 |
+
batch = tokenizer(batch, padding=True, return_tensors='pt', truncation=True, max_length=150)
|
11 |
+
labels = torch.tensor([
|
12 |
+
[-100 if mask == 0 else token for mask, token in mask_and_tokens] for mask_and_tokens in [zip(masks, labels) for masks, labels in zip(batch['attention_mask'], batch['input_ids'])]
|
13 |
+
])
|
14 |
+
batch['labels'] = labels
|
15 |
+
batch = {k: v.to(device) for k, v in batch.items()}
|
16 |
+
|
17 |
+
with torch.no_grad():
|
18 |
+
outputs = model(batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['labels'])
|
19 |
+
shift_logits = outputs.logits[..., :-1, :].contiguous()
|
20 |
+
shift_labels = batch['labels'][..., 1:].contiguous()
|
21 |
+
loss_fct = CrossEntropyLoss(reduction='none')
|
22 |
+
loss = loss_fct(shift_logits.transpose(1,2), shift_labels)
|
23 |
+
num_tokens = torch.sum(shift_labels != -100, dim=1)
|
24 |
+
loss_sum = torch.sum(loss, dim=1)
|
25 |
+
loss = loss_sum / num_tokens
|
26 |
+
losses.append(loss)
|
27 |
+
losses = torch.cat(losses)
|
28 |
+
return losses
|