ThanaritKanjanametawat commited on
Commit
bd0c703
β€’
1 Parent(s): 953bb32

Deploying all model and test files

Browse files
ModelDriver.py CHANGED
@@ -1,6 +1,8 @@
1
  from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
2
  import torch
3
  import torch.nn as nn
 
 
4
 
5
 
6
  device = torch.device("cpu")
@@ -28,27 +30,76 @@ def extract_features(text):
28
  def RobertaSentinelOpenGPTInference(input_text):
29
  features = extract_features(input_text)
30
  loaded_model = MLP(768).to(device)
31
- loaded_model.load_state_dict(torch.load("MLPDictStates/RobertaSentinelOpenGPT.pth", map_location=device))
32
 
33
  # Define the tokenizer and model for feature extraction
34
  with torch.no_grad():
35
  inputs = torch.tensor(features).to(device)
36
  outputs = loaded_model(inputs.float())
37
- _, predicted = torch.max(outputs, 1)
38
 
39
- return predicted.item()
 
 
40
 
41
  def RobertaSentinelCSAbstractInference(input_text):
42
  features = extract_features(input_text)
43
  loaded_model = MLP(768).to(device)
44
- loaded_model.load_state_dict(torch.load("MLPDictStates/RobertaSentinelCSAbstract.pth", map_location=device))
45
 
46
  # Define the tokenizer and model for feature extraction
47
  with torch.no_grad():
48
  inputs = torch.tensor(features).to(device)
49
  outputs = loaded_model(inputs.float())
50
- _, predicted = torch.max(outputs, 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- return predicted.item()
53
 
54
 
 
1
  from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
2
  import torch
3
  import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from torch.utils.data import TensorDataset, DataLoader
6
 
7
 
8
  device = torch.device("cpu")
 
30
  def RobertaSentinelOpenGPTInference(input_text):
31
  features = extract_features(input_text)
32
  loaded_model = MLP(768).to(device)
33
+ loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelOpenGPT.pth", map_location=device))
34
 
35
  # Define the tokenizer and model for feature extraction
36
  with torch.no_grad():
37
  inputs = torch.tensor(features).to(device)
38
  outputs = loaded_model(inputs.float())
39
+ _, predicted = torch.max(outputs, 0)
40
 
41
+ Probs = (F.softmax(outputs, dim=0).cpu().numpy())
42
+
43
+ return Probs
44
 
45
  def RobertaSentinelCSAbstractInference(input_text):
46
  features = extract_features(input_text)
47
  loaded_model = MLP(768).to(device)
48
+ loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelCSAbstract.pth", map_location=device))
49
 
50
  # Define the tokenizer and model for feature extraction
51
  with torch.no_grad():
52
  inputs = torch.tensor(features).to(device)
53
  outputs = loaded_model(inputs.float())
54
+ _, predicted = torch.max(outputs, 0)
55
+
56
+ Probs = (F.softmax(outputs, dim=0).cpu().numpy())
57
+
58
+ return Probs
59
+
60
+
61
+ def RobertaClassifierOpenGPTInference(input_text):
62
+ tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
63
+ model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT.pth"
64
+ model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
65
+ model.load_state_dict(torch.load(model_path))
66
+ model = model.to(torch.device('cpu'))
67
+ model.eval()
68
+
69
+
70
+ tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
71
+ input_ids = tokenized_input['input_ids'].to(torch.device('cpu'))
72
+ attention_mask = tokenized_input['attention_mask'].to(torch.device('cpu'))
73
+
74
+ # Make a prediction
75
+ with torch.no_grad():
76
+ outputs = model(input_ids, attention_mask=attention_mask)
77
+ logits = outputs.logits
78
+ Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
79
+
80
+ return Probs
81
+
82
+
83
+ def RobertaClassifierCSAbstractInference(input_text):
84
+ tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
85
+ model_path = "ClassifierCheckpoint/RobertaClassifierCSAbstract.pth"
86
+ model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
87
+ model.load_state_dict(torch.load(model_path))
88
+ model = model.to(torch.device('cpu'))
89
+ model.eval()
90
+
91
+
92
+ tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
93
+ input_ids = tokenized_input['input_ids'].to(torch.device('cpu'))
94
+ attention_mask = tokenized_input['attention_mask'].to(torch.device('cpu'))
95
+
96
+ # Make a prediction
97
+ with torch.no_grad():
98
+ outputs = model(input_ids, attention_mask=attention_mask)
99
+ logits = outputs.logits
100
+ Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
101
+
102
+ return Probs
103
 
 
104
 
105
 
{MLPDictStates β†’ SentinelCheckpoint}/RobertaSentinelCSAbstract.pth RENAMED
File without changes
{MLPDictStates β†’ SentinelCheckpoint}/RobertaSentinelOpenGPT.pth RENAMED
File without changes
Test.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ModelDriver import *
2
+ import numpy as np
3
+ import warnings
4
+ warnings.filterwarnings("ignore")
5
+
6
+ Input_Text = "I want to do this data"
7
+
8
+ # print("RobertaSentinelOpenGPTInference")
9
+ # Probs = RobertaSentinelOpenGPTInference(Input_Text)
10
+ # Pred = "Human Written" if not np.argmax(Probs) else "Machine Generated"
11
+ #
12
+ # print(f"Prediction: {Pred} ")
13
+ # print(f"Confidence:", max(Probs))
14
+
15
+ # print("RobertaSentinelCSAbstractInference")
16
+ # Probs = RobertaSentinelCSAbstractInference(Input_Text)
17
+ # Pred = "Human Written" if not np.argmax(Probs) else "Machine Generated"
18
+ #
19
+ # print(f"Prediction: {Pred} ")
20
+ # print(f"Confidence:", max(Probs))
21
+
22
+ print("RobertaClassifierCSAbstractInference")
23
+ Probs = RobertaClassifierOpenGPTInference(Input_Text)
24
+ Pred = "Human Written" if not np.argmax(Probs) else "Machine Generated"
25
+
26
+ print(Probs)
27
+ print(f"Prediction: {Pred} ")
28
+ print(f"Confidence:", max(Probs))
app.py CHANGED
@@ -1,24 +1,42 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
- from ModelDriver import RobertaSentinelOpenGPTInference, RobertaSentinelCSAbstractInference
 
4
 
5
  # Add a title
6
  st.title('GPT Detection Demo')
7
 
8
  # Add 4 options for 4 models
9
- option = st.sidebar.selectbox(
10
  'Which Model do you want to use?',
11
- ('RobertaSentinelOpenGPT', 'RobertaSentinelCSAbstract'),
 
 
 
 
 
12
  )
13
 
14
 
15
  text = st.text_area('Enter text here', '')
16
 
17
  if st.button('Generate'):
18
- if option == 'RobertaSentinelOpenGPT':
19
- result = RobertaSentinelOpenGPTInference(text)
20
- elif option == 'RobertaSentinelCSAbstract':
21
- result = RobertaSentinelCSAbstractInference(text)
 
 
 
 
 
 
 
 
 
 
 
 
22
  st.write(result)
23
 
24
 
 
1
  import streamlit as st
2
  from transformers import pipeline
3
+ from ModelDriver import *
4
+ import numpy as np
5
 
6
  # Add a title
7
  st.title('GPT Detection Demo')
8
 
9
  # Add 4 options for 4 models
10
+ ModelOption = st.sidebar.selectbox(
11
  'Which Model do you want to use?',
12
+ ('RobertaSentinel', 'RobertaClassifier'),
13
+ )
14
+
15
+ DatasetOption = st.sidebar.selectbox(
16
+ 'Which Dataset do you want to use?',
17
+ ('OpenGPT', 'CSAbstract'),
18
  )
19
 
20
 
21
  text = st.text_area('Enter text here', '')
22
 
23
  if st.button('Generate'):
24
+ if ModelOption == 'RobertaSentinel':
25
+ if DatasetOption == 'OpenGPT':
26
+ result = RobertaSentinelOpenGPTInference(text)
27
+ elif DatasetOption == 'CSAbstract':
28
+ result = RobertaSentinelCSAbstractInference(text)
29
+
30
+ elif ModelOption == 'RobertaClassifier':
31
+ if DatasetOption == 'OpenGPT':
32
+ result = RobertaClassifierOpenGPTInference(text)
33
+ elif DatasetOption == 'CSAbstract':
34
+ result = RobertaClassifierCSAbstractInference(text)
35
+
36
+ Prediction = "Human Written" if not np.argmax(result) else "Machine Generated"
37
+
38
+ print(f"Prediction: {Prediction} ")
39
+ print(f"Probabilty:", max(result))
40
  st.write(result)
41
 
42