Spaces:

atyshka
/

ai-detector

Running

ai-detector / app.py

Alex Tyshka

Initial commit

cc5ee73 over 1 year ago

3.23 kB

	import pickle
	import torch
	import numpy as np
	import gradio as gr
	from nltk import word_tokenize, sent_tokenize
	from scipy.stats import shapiro
	from transformers import GPT2LMHeadModel, GPT2TokenizerFast

	model = GPT2LMHeadModel.from_pretrained('gpt2-large').to('cuda')
	tokenizer: GPT2TokenizerFast = GPT2TokenizerFast.from_pretrained('gpt2-large')

	with open('model.pkl', 'rb') as f:
	lr_model = pickle.load(f)

	def get_perplexity(text: str):
	tokens = tokenizer(text, return_tensors='pt', truncation=True, return_offsets_mapping=True)
	inputs = tokens.input_ids.to('cuda')
	targets = inputs.clone()
	with torch.no_grad():
	outputs = model(inputs, labels=targets)
	labels = targets.to(outputs.logits.device)
	# Shift so that tokens < n predict n
	shift_logits = outputs.logits[..., :-1, :].contiguous()
	shift_labels = labels[..., 1:].contiguous()
	perplexities = torch.nn.functional.cross_entropy(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1), reduce=False)
	output = []
	targets = targets.to('cpu')[0].tolist()
	# tokens = tokenizer.convert_ids_to_tokens(targets)
	offsets = tokens.offset_mapping[0].tolist()
	print(perplexities.to('cpu').tolist())
	perplexities = perplexities.to('cpu').numpy()
	perplexities = perplexities / np.max(perplexities)
	perplexities = perplexities.tolist()
	print(perplexities)
	# output.append((text[:offsets[0][1]], 0))
	# for offset, p in zip(offsets[1:], perplexities):
	# output.append((text[offset[0]:offset[1]], p))
	# print(type(p))
	output.append((text[:tokens.word_to_chars(0)[1]], 0))
	for word_id, p in zip(tokens.word_ids()[1:], perplexities):
	if word_id == len(output):
	span = tokens.word_to_chars(word_id)
	output.append((text[span[0]:span[1]], p))
	return outputs.loss, output



	def score_text(text):
	perplexity, word_perplexities = get_perplexity(text)
	lengths = []
	for sentence in sent_tokenize(text):
	lengths.append(len(word_tokenize(sentence)))
	scores = lr_model.predict_proba([[perplexity.item(), np.mean(lengths), np.std(lengths), shapiro(lengths).pvalue if len(lengths) > 2 else 0.5]])[0]

	return {'Human': scores[0], 'AI': scores[1]}, word_perplexities

	sample_text = """
	The Saturn V is a type of rocket that was developed by NASA in the 1960s to support the Apollo program, which aimed to land humans on the Moon.
	It remains the most powerful rocket ever built, and its five F-1 engines generated more than 7.5 million pounds of thrust at liftoff.
	The Saturn V was used for all of the Apollo missions to the Moon, as well as the launch of the Skylab space station.
	Despite its impressive capabilities, the Saturn V was only used for a brief period of time before being retired in 1973.
	Nevertheless, it remains a landmark achievement in the history of space exploration and a symbol of human ingenuity and determination."""

	demo = gr.Interface(fn=score_text, inputs=[gr.Textbox(label="Text to score", lines=5, value=sample_text)], outputs=[gr.Label(), gr.HighlightedText()] )

	demo.launch()