import json import random from pathlib import Path import gradio as gr import numpy as np from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline # Constants MIN_WORDS = 50 MAX_WORDS = 500 SAMPLE_JSON_PATH = Path('samples.json') # Load models def load_model(model_name): tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) return pipeline('text-classification', model=model, tokenizer=tokenizer, truncation=True, max_length=512, top_k=4) classifier = load_model("microsoft/deberta-base") # Load sample essays with open(SAMPLE_JSON_PATH, 'r') as f: demo_essays = json.load(f) # Global variable to store the current essay index current_essay_index = None TEXT_CLASS_MAPPING = { 'LABEL_2': 'Machine-Generated', 'LABEL_0': 'Human-Written', 'LABEL_3': 'Machine-Written, Machine-Humanized', 'LABEL_1': 'Human-Written, Machine-Polished' } def process_result_detection_tab(text): result = classifier(text)[0] labels = [TEXT_CLASS_MAPPING[x['label']] for x in result] scores = list(np.array([x['score'] for x in result])) final_results = dict(zip(labels, scores)) # Return only the label with the highest score return max(final_results, key=final_results.get) def update_detection_tab(name): if name == '': return "" return process_result_detection_tab(name) def active_button_detection_tab(input_text): if not (50 <= len(input_text.split()) <= 500): return gr.Button("Check Origin", variant="primary", interactive=False) return gr.Button("Check Origin", variant="primary", interactive=True) def clear_detection_tab(): return "", gr.Button("Check Origin", variant="primary", interactive=False) def count_words_detection_tab(text): return f'{len(text.split())}/500 words (Minimum 50 words)' def generate_text_challenge_tab(): global index mg = gr.Button("Machine-Generated", variant="secondary", interactive=True) hw = gr.Button("Human-Written", variant="secondary", interactive=True) mh = gr.Button("Machine-Humanized", variant="secondary", interactive=True) mp = gr.Button("Machine-Polished", variant="secondary", interactive=True) index = random.choice(range(80)) essay = demo_essays[index][0] return essay, mg, hw, mh, mp, '' def correct_label_challenge_tab(): if 0 <= index < 20 : return 'Human-Written' elif 20 <= index < 40: return 'Machine-Generated' elif 40 <= index < 60: return 'Machine-Polished' elif 60 <= index < 80: return 'Machine-Humanized' def show_result_challenge_tab(button): correct_btn = correct_label_challenge_tab() mg = gr.Button("Machine-Generated", variant="secondary") hw = gr.Button("Human-Written", variant="secondary") mh = gr.Button("Machine-Humanized", variant="secondary") mp = gr.Button("Machine-Polished", variant="secondary") if button == 'Machine-Generated': mg = gr.Button("Machine-Generated", variant="stop") elif button == 'Human-Written': hw = gr.Button("Human-Written", variant="stop") elif button == 'Machine-Humanized': mh = gr.Button("Machine-Humanized", variant="stop") elif button == 'Machine-Polished': mp = gr.Button("Machine-Polished", variant="stop") if correct_btn == 'Machine-Generated': mg = gr.Button("Machine-Generated", variant="primary") elif correct_btn == 'Human-Written': hw = gr.Button("Human-Written", variant="primary") elif correct_btn == 'Machine-Humanized': mh = gr.Button("Machine-Humanized", variant="primary") elif correct_btn == 'Machine-Polished': mp = gr.Button("Machine-Polished", variant="primary") outcome = 'Correct' if button == correct_btn else 'Incorrect' return outcome, mg, hw, mh, mp css = """ body, .gradio-container { font-family: Arial, sans-serif; } .gr-input, .gr-textarea { } .class-intro { padding: 15px; margin-bottom: 20px; border-radius: 5px; } .class-intro h2 { margin-top: 0; } .class-intro p { margin-bottom: 5px; } """ class_intro_html = """
Human-Written: Original text created by humans.
Machine-Generated: Text created by AI from basic prompts, without style instructions.
Human-Written, Machine-Polished: Human text refined by AI for grammar and flow, without new content.
Machine-Written, Machine-Humanized: AI-generated text modified to mimic human writing style.