ThanaritKanjanametawat commited on
Commit
2bb8a76
β€’
1 Parent(s): f1fd352

Change UI Options (1model, 3datasets) for Senior Project

Browse files
ClassifierCheckpoint/RobertaClassifierCHEAT256.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:330a021e57adfb3261f338f3372f8d22a5e60350b4f62ecabae113346ce46ca0
3
+ size 498675543
ClassifierCheckpoint/{RobertaClassifierCSAbstract.pth β†’ RobertaClassifierGPABenchmark512.pth} RENAMED
File without changes
ClassifierCheckpoint/{RobertaClassifierOpenGPT.pth β†’ RobertaClassifierOpenGPT512.pth} RENAMED
File without changes
ModelDriver.py CHANGED
@@ -60,9 +60,9 @@ def RobertaSentinelCSAbstractInference(input_text):
60
 
61
  def RobertaClassifierOpenGPTInference(input_text):
62
  tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
63
- model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT.pth"
64
  model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
65
- model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
66
  model = model.to(device)
67
  model.eval()
68
 
@@ -80,11 +80,11 @@ def RobertaClassifierOpenGPTInference(input_text):
80
  return Probs
81
 
82
 
83
- def RobertaClassifierCSAbstractInference(input_text):
84
  tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
85
- model_path = "ClassifierCheckpoint/RobertaClassifierCSAbstract.pth"
86
  model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
87
- model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
88
  model = model.to(device)
89
  model.eval()
90
 
@@ -101,5 +101,25 @@ def RobertaClassifierCSAbstractInference(input_text):
101
 
102
  return Probs
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
 
 
60
 
61
  def RobertaClassifierOpenGPTInference(input_text):
62
  tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
63
+ model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT512.pth"
64
  model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
65
+ model.load_state_dict(torch.load(model_path, map_location=device))
66
  model = model.to(device)
67
  model.eval()
68
 
 
80
  return Probs
81
 
82
 
83
+ def RobertaClassifierGPABenchmarkInference(input_text):
84
  tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
85
+ model_path = "ClassifierCheckpoint/RobertaClassifierGPABenchmark512.pth"
86
  model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
87
+ model.load_state_dict(torch.load(model_path, map_location=device))
88
  model = model.to(device)
89
  model.eval()
90
 
 
101
 
102
  return Probs
103
 
104
+ def RobertaClassifierCHEATInference(input_text):
105
+ tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
106
+ model_path = "ClassifierCheckpoint/RobertaClassifierCHEAT256.pth"
107
+ model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
108
+ model.load_state_dict(torch.load(model_path, map_location=device))
109
+ model = model.to(device)
110
+ model.eval()
111
+
112
+ tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=256, return_tensors='pt')
113
+ input_ids = tokenized_input['input_ids'].to(device)
114
+ attention_mask = tokenized_input['attention_mask'].to(device)
115
+
116
+ # Make a prediction
117
+ with torch.no_grad():
118
+ outputs = model(input_ids, attention_mask=attention_mask)
119
+ logits = outputs.logits
120
+ Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
121
+
122
+ return Probs
123
+
124
 
125
 
app.py CHANGED
@@ -5,39 +5,43 @@ import numpy as np
5
 
6
  # Add a title
7
  st.title('GPT Detection Demo')
8
- st.write("This is a demo for GPT detection. You can use this demo to test the model. The model is trained on two datasets: OpenGPT and CSAbstract. You can choose the model and dataset in the sidebar.")
9
- st.write("Reference on how we built Roberta Sentinel: https://arxiv.org/abs/2305.07969")
10
 
11
  # Add 4 options for 4 models
12
  ModelOption = st.sidebar.selectbox(
13
  'Which Model do you want to use?',
14
- ('RobertaSentinel', 'RobertaClassifier'),
15
  )
16
 
17
  DatasetOption = st.sidebar.selectbox(
18
  'Which Dataset the model was trained on?',
19
- ('OpenGPT', 'CSAbstract'),
20
  )
21
 
22
 
23
- text = st.text_area('Enter text here (max 500 words)', '')
24
 
25
  if st.button('Generate'):
26
- if ModelOption == 'RobertaSentinel':
27
- if DatasetOption == 'OpenGPT':
28
- result = RobertaSentinelOpenGPTInference(text)
29
- st.write("Model: RobertaSentinelOpenGPT")
30
- elif DatasetOption == 'CSAbstract':
31
- result = RobertaSentinelCSAbstractInference(text)
32
- st.write("Model: RobertaSentinelCSAbstract")
33
-
34
- elif ModelOption == 'RobertaClassifier':
35
  if DatasetOption == 'OpenGPT':
36
  result = RobertaClassifierOpenGPTInference(text)
37
  st.write("Model: RobertaClassifierOpenGPT")
38
- elif DatasetOption == 'CSAbstract':
39
- result = RobertaClassifierCSAbstractInference(text)
40
- st.write("Model: RobertaClassifierCSAbstract")
 
 
 
 
41
 
42
  Prediction = "Human Written" if not np.argmax(result) else "Machine Generated"
43
 
 
5
 
6
  # Add a title
7
  st.title('GPT Detection Demo')
8
+ st.write("This is a demo for GPT detection. You can use this demo to test the model. There are 3 variations of the model, The model was trained on CHEAT, GPABenchmark, OpenGPT datasets. They are all in the domain of Scientific Abstract. You can choose dataset variation of the model on the sidebar.")
9
+ # st.write("Reference on how we built Roberta Sentinel: https://arxiv.org/abs/2305.07969")
10
 
11
  # Add 4 options for 4 models
12
  ModelOption = st.sidebar.selectbox(
13
  'Which Model do you want to use?',
14
+ ('RobertaClassifier'),
15
  )
16
 
17
  DatasetOption = st.sidebar.selectbox(
18
  'Which Dataset the model was trained on?',
19
+ ('OpenGPT', 'GPABenchmark', 'CHEAT'),
20
  )
21
 
22
 
23
+ text = st.text_area('Enter text here (max 512 words)', '')
24
 
25
  if st.button('Generate'):
26
+ # if ModelOption == 'RobertaSentinel':
27
+ # if DatasetOption == 'OpenGPT':
28
+ # result = RobertaSentinelOpenGPTInference(text)
29
+ # st.write("Model: RobertaSentinelOpenGPT")
30
+ # elif DatasetOption == 'CSAbstract':
31
+ # result = RobertaSentinelCSAbstractInference(text)
32
+ # st.write("Model: RobertaSentinelCSAbstract")
33
+
34
+ if ModelOption == 'RobertaClassifier':
35
  if DatasetOption == 'OpenGPT':
36
  result = RobertaClassifierOpenGPTInference(text)
37
  st.write("Model: RobertaClassifierOpenGPT")
38
+ elif DatasetOption == 'GPABenchmark':
39
+ result = RobertaClassifierGPABenchmarkInference(text)
40
+ st.write("Model: RobertaClassifierGPABenchmark")
41
+ elif DatasetOption == 'CHEAT':
42
+ result = RobertaClassifierCHEATInference(text)
43
+ st.write("Model: RobertaClassifierCHEAT")
44
+
45
 
46
  Prediction = "Human Written" if not np.argmax(result) else "Machine Generated"
47