GPT-Detection-Demo / ModelDriver.py
ThanaritKanjanametawat
Change UI Options (1model, 3datasets) for Senior Project
2bb8a76
raw
history blame contribute delete
No virus
4.68 kB
from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
device = torch.device("cpu")
class MLP(nn.Module):
def __init__(self, input_dim):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_dim, 256)
self.fc2 = nn.Linear(256, 2)
self.gelu = nn.GELU()
def forward(self, x):
x = self.gelu(self.fc1(x))
x = self.fc2(x)
return x
def extract_features(text):
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model = RobertaModel.from_pretrained("roberta-base").to(device)
tokenized_text = tokenizer.encode(text, truncation=True, max_length=512, return_tensors="pt")
outputs = model(tokenized_text)
last_hidden_states = outputs.last_hidden_state
TClassification = last_hidden_states[:, 0, :].squeeze().detach().numpy()
return TClassification
def RobertaSentinelOpenGPTInference(input_text):
features = extract_features(input_text)
loaded_model = MLP(768).to(device)
loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelOpenGPT.pth", map_location=device))
# Define the tokenizer and model for feature extraction
with torch.no_grad():
inputs = torch.tensor(features).to(device)
outputs = loaded_model(inputs.float())
_, predicted = torch.max(outputs, 0)
Probs = (F.softmax(outputs, dim=0).cpu().numpy())
return Probs
def RobertaSentinelCSAbstractInference(input_text):
features = extract_features(input_text)
loaded_model = MLP(768).to(device)
loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelCSAbstract.pth", map_location=device))
# Define the tokenizer and model for feature extraction
with torch.no_grad():
inputs = torch.tensor(features).to(device)
outputs = loaded_model(inputs.float())
_, predicted = torch.max(outputs, 0)
Probs = (F.softmax(outputs, dim=0).cpu().numpy())
return Probs
def RobertaClassifierOpenGPTInference(input_text):
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT512.pth"
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
model.load_state_dict(torch.load(model_path, map_location=device))
model = model.to(device)
model.eval()
tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
input_ids = tokenized_input['input_ids'].to(device)
attention_mask = tokenized_input['attention_mask'].to(device)
# Make a prediction
with torch.no_grad():
outputs = model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
return Probs
def RobertaClassifierGPABenchmarkInference(input_text):
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model_path = "ClassifierCheckpoint/RobertaClassifierGPABenchmark512.pth"
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
model.load_state_dict(torch.load(model_path, map_location=device))
model = model.to(device)
model.eval()
tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
input_ids = tokenized_input['input_ids'].to(device)
attention_mask = tokenized_input['attention_mask'].to(device)
# Make a prediction
with torch.no_grad():
outputs = model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
return Probs
def RobertaClassifierCHEATInference(input_text):
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model_path = "ClassifierCheckpoint/RobertaClassifierCHEAT256.pth"
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
model.load_state_dict(torch.load(model_path, map_location=device))
model = model.to(device)
model.eval()
tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=256, return_tensors='pt')
input_ids = tokenized_input['input_ids'].to(device)
attention_mask = tokenized_input['attention_mask'].to(device)
# Make a prediction
with torch.no_grad():
outputs = model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
return Probs