Spaces:

Thanarit
/

GPT-Detection-Demo

Sleeping

App Files Files Community

ThanaritKanjanametawat commited on Sep 26, 2023

Commit

bd0c703

•

1 Parent(s): 953bb32

Deploying all model and test files

Browse files

Files changed (5) hide show

ModelDriver.py +57 -6
{MLPDictStates → SentinelCheckpoint}/RobertaSentinelCSAbstract.pth +0 -0
{MLPDictStates → SentinelCheckpoint}/RobertaSentinelOpenGPT.pth +0 -0
Test.py +28 -0
app.py +25 -7

ModelDriver.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
 import torch
 import torch.nn as nn
 device = torch.device("cpu")
@@ -28,27 +30,76 @@ def extract_features(text):
 def RobertaSentinelOpenGPTInference(input_text):
     features = extract_features(input_text)
     loaded_model = MLP(768).to(device)
-    loaded_model.load_state_dict(torch.load("MLPDictStates/RobertaSentinelOpenGPT.pth", map_location=device))
     # Define the tokenizer and model for feature extraction
     with torch.no_grad():
         inputs = torch.tensor(features).to(device)
         outputs = loaded_model(inputs.float())
-        _, predicted = torch.max(outputs, 1)
-    return predicted.item()
 def RobertaSentinelCSAbstractInference(input_text):
     features = extract_features(input_text)
     loaded_model = MLP(768).to(device)
-    loaded_model.load_state_dict(torch.load("MLPDictStates/RobertaSentinelCSAbstract.pth", map_location=device))
     # Define the tokenizer and model for feature extraction
     with torch.no_grad():
         inputs = torch.tensor(features).to(device)
         outputs = loaded_model(inputs.float())
-        _, predicted = torch.max(outputs, 1)
-    return predicted.item()

 from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import TensorDataset, DataLoader
 device = torch.device("cpu")
 def RobertaSentinelOpenGPTInference(input_text):
     features = extract_features(input_text)
     loaded_model = MLP(768).to(device)
+    loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelOpenGPT.pth", map_location=device))
     # Define the tokenizer and model for feature extraction
     with torch.no_grad():
         inputs = torch.tensor(features).to(device)
         outputs = loaded_model(inputs.float())
+        _, predicted = torch.max(outputs, 0)
+        Probs = (F.softmax(outputs, dim=0).cpu().numpy())
+    return Probs
 def RobertaSentinelCSAbstractInference(input_text):
     features = extract_features(input_text)
     loaded_model = MLP(768).to(device)
+    loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelCSAbstract.pth", map_location=device))
     # Define the tokenizer and model for feature extraction
     with torch.no_grad():
         inputs = torch.tensor(features).to(device)
         outputs = loaded_model(inputs.float())
+        _, predicted = torch.max(outputs, 0)
+        Probs = (F.softmax(outputs, dim=0).cpu().numpy())
+    return Probs
+def RobertaClassifierOpenGPTInference(input_text):
+    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
+    model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT.pth"
+    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
+    model.load_state_dict(torch.load(model_path))
+    model = model.to(torch.device('cpu'))
+    model.eval()
+    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
+    input_ids = tokenized_input['input_ids'].to(torch.device('cpu'))
+    attention_mask = tokenized_input['attention_mask'].to(torch.device('cpu'))
+    # Make a prediction
+    with torch.no_grad():
+        outputs = model(input_ids, attention_mask=attention_mask)
+    logits = outputs.logits
+    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
+    return Probs
+def RobertaClassifierCSAbstractInference(input_text):
+    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
+    model_path = "ClassifierCheckpoint/RobertaClassifierCSAbstract.pth"
+    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
+    model.load_state_dict(torch.load(model_path))
+    model = model.to(torch.device('cpu'))
+    model.eval()
+    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
+    input_ids = tokenized_input['input_ids'].to(torch.device('cpu'))
+    attention_mask = tokenized_input['attention_mask'].to(torch.device('cpu'))
+    # Make a prediction
+    with torch.no_grad():
+        outputs = model(input_ids, attention_mask=attention_mask)
+    logits = outputs.logits
+    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
+    return Probs

{MLPDictStates → SentinelCheckpoint}/RobertaSentinelCSAbstract.pth RENAMED Viewed

File without changes

{MLPDictStates → SentinelCheckpoint}/RobertaSentinelOpenGPT.pth RENAMED Viewed

File without changes

Test.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from ModelDriver import *
+import numpy as np
+import warnings
+warnings.filterwarnings("ignore")
+Input_Text = "I want to do this data"
+# print("RobertaSentinelOpenGPTInference")
+# Probs = RobertaSentinelOpenGPTInference(Input_Text)
+# Pred = "Human Written" if not np.argmax(Probs) else "Machine Generated"
+#
+# print(f"Prediction: {Pred} ")
+# print(f"Confidence:", max(Probs))
+# print("RobertaSentinelCSAbstractInference")
+# Probs = RobertaSentinelCSAbstractInference(Input_Text)
+# Pred = "Human Written" if not np.argmax(Probs) else "Machine Generated"
+#
+# print(f"Prediction: {Pred} ")
+# print(f"Confidence:", max(Probs))
+print("RobertaClassifierCSAbstractInference")
+Probs = RobertaClassifierOpenGPTInference(Input_Text)
+Pred = "Human Written" if not np.argmax(Probs) else "Machine Generated"
+print(Probs)
+print(f"Prediction: {Pred} ")
+print(f"Confidence:", max(Probs))

app.py CHANGED Viewed

@@ -1,24 +1,42 @@
 import streamlit as st
 from transformers import pipeline
-from ModelDriver import RobertaSentinelOpenGPTInference, RobertaSentinelCSAbstractInference
 # Add a title
 st.title('GPT Detection Demo')
 # Add 4 options for 4 models
-option = st.sidebar.selectbox(
     'Which Model do you want to use?',
-    ('RobertaSentinelOpenGPT', 'RobertaSentinelCSAbstract'),
 )
 text = st.text_area('Enter text here', '')
 if st.button('Generate'):
-    if option == 'RobertaSentinelOpenGPT':
-        result = RobertaSentinelOpenGPTInference(text)
-    elif option == 'RobertaSentinelCSAbstract':
-        result = RobertaSentinelCSAbstractInference(text)
     st.write(result)

 import streamlit as st
 from transformers import pipeline
+from ModelDriver import *
+import numpy as np
 # Add a title
 st.title('GPT Detection Demo')
 # Add 4 options for 4 models
+ModelOption = st.sidebar.selectbox(
     'Which Model do you want to use?',
+    ('RobertaSentinel', 'RobertaClassifier'),
+)
+DatasetOption = st.sidebar.selectbox(
+    'Which Dataset do you want to use?',
+    ('OpenGPT', 'CSAbstract'),
 )
 text = st.text_area('Enter text here', '')
 if st.button('Generate'):
+    if ModelOption == 'RobertaSentinel':
+        if DatasetOption == 'OpenGPT':
+            result = RobertaSentinelOpenGPTInference(text)
+        elif DatasetOption == 'CSAbstract':
+            result = RobertaSentinelCSAbstractInference(text)
+    elif ModelOption == 'RobertaClassifier':
+        if DatasetOption == 'OpenGPT':
+            result = RobertaClassifierOpenGPTInference(text)
+        elif DatasetOption == 'CSAbstract':
+            result = RobertaClassifierCSAbstractInference(text)
+    Prediction = "Human Written" if not np.argmax(result) else "Machine Generated"
+    print(f"Prediction: {Prediction} ")
+    print(f"Probabilty:", max(result))
     st.write(result)