Spaces:
Running
Running
File size: 7,844 Bytes
10f3d6f 880a0f2 ed6e5bf f21a673 ed6e5bf f21a673 ed6e5bf f21a673 ed6e5bf f21a673 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 cdbcd6b ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 f21a673 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf 7df1b03 ed6e5bf f21a673 7df1b03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
import json
import random
from pathlib import Path
import gradio as gr
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
# Constants
MIN_WORDS = 50
MAX_WORDS = 500
SAMPLE_JSON_PATH = Path('samples.json')
# Load models
def load_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
return pipeline('text-classification', model=model, tokenizer=tokenizer, truncation=True, max_length=512, top_k=4)
classifier = load_model("./fine-tuned-distillberta")
# Load sample essays
with open(SAMPLE_JSON_PATH, 'r') as f:
demo_essays = json.load(f)
# Global variable to store the current essay index
current_essay_index = None
TEXT_CLASS_MAPPING = {
'llm': 'Machine Generated',
'human': 'Human Written',
'machine-humanized': 'Machine Written, Machine Humanized',
'machine-polished': 'Human Written, Machine Polished'
}
def process_result_detection_tab(text):
result = classifier(text)[0]
labels = [TEXT_CLASS_MAPPING[x['label']] for x in result]
scores = list(np.array([x['score'] for x in result]))
final_results = dict(zip(labels, scores))
# Return only the label with the highest score
return max(final_results, key=final_results.get)
def update_detection_tab(name):
if name == '':
return ""
return process_result_detection_tab(name)
def active_button_detection_tab(input_text):
if not (50 <= len(input_text.split()) <= 500):
return gr.Button("Check Origin", variant="primary", interactive=False)
return gr.Button("Check Origin", variant="primary", interactive=True)
def clear_detection_tab():
return "", gr.Button("Check Origin", variant="primary", interactive=False)
def count_words_detection_tab(text):
return f'{len(text.split())}/500 words (Minimum 50 words)'
def generate_text_challenge_tab():
global index
mg = gr.Button("Machine-Generated", variant="secondary", interactive=True)
hw = gr.Button("Human-Written", variant="secondary", interactive=True)
mh = gr.Button("Machine-Humanized", variant="secondary", interactive=True)
mp = gr.Button("Machine-Polished", variant="secondary", interactive=True)
index = random.choice(range(80))
essay = demo_essays[index][0]
return essay, mg, hw, mh, mp, ''
def correct_label_challenge_tab():
if 0 <= index < 20 :
return 'Human-Written'
elif 20 <= index < 40:
return 'Machine-Generated'
elif 40 <= index < 60:
return 'Machine-Polished'
elif 60 <= index < 80:
return 'Machine-Humanized'
def show_result_challenge_tab(button):
correct_btn = correct_label_challenge_tab()
mg = gr.Button("Machine-Generated", variant="secondary")
hw = gr.Button("Human-Written", variant="secondary")
mh = gr.Button("Machine-Humanized", variant="secondary")
mp = gr.Button("Machine-Polished", variant="secondary")
if button == 'Machine-Generated':
mg = gr.Button("Machine-Generated", variant="stop")
elif button == 'Human-Written':
hw = gr.Button("Human-Written", variant="stop")
elif button == 'Machine-Humanized':
mh = gr.Button("Machine-Humanized", variant="stop")
elif button == 'Machine-Polished':
mp = gr.Button("Machine-Polished", variant="stop")
if correct_btn == 'Machine-Generated':
mg = gr.Button("Machine-Generated", variant="primary")
elif correct_btn == 'Human-Written':
hw = gr.Button("Human-Written", variant="primary")
elif correct_btn == 'Machine-Humanized':
mh = gr.Button("Machine-Humanized", variant="primary")
elif correct_btn == 'Machine-Polished':
mp = gr.Button("Machine-Polished", variant="primary")
outcome = 'Correct' if button == correct_btn else 'Incorrect'
return outcome, mg, hw, mh, mp
css = """
body, .gradio-container {
font-family: Arial, sans-serif;
}
.gr-button {
background-color: #1e1e1e;
border: 1px solid #333333;
color: #ffffff;
}
.gr-button:hover {
background-color: #2e2e2e;
}
.gr-input, .gr-textarea {
background-color: #1f2937;
border: 1px solid #333333;
color: #ffffff;
}
.gr-form {
background-color: #1f2937;
border: 1px solid #333333;
}
.class-intro {
background-color: #1f2937;
border: 1px solid #333333;
padding: 15px;
margin-bottom: 20px;
border-radius: 5px;
}
.class-intro h2 {
margin-top: 0;
color: #ffffff;
}
.class-intro p {
margin-bottom: 5px;
}
"""
class_intro_html = """
<div class="class-intro">
<h2>Text Classes</h2>
<p><strong>Human Written:</strong> Original text created by humans.</p>
<p><strong>Machine Generated:</strong> Text created by AI from basic prompts, without style instructions.</p>
<p><strong>Human Written, Machine Polished:</strong> Human text refined by AI for grammar and flow, without new content.</p>
<p><strong>Machine Written, Machine Humanized:</strong> AI-generated text modified to mimic human writing style.</p>
</div>
"""
with gr.Blocks(css=css) as demo:
gr.Markdown("""<h1><centre>Machine Generated Text (MGT) Detection</center></h1>""")
with gr.Tab('Try it!'):
gr.HTML(class_intro_html)
with gr.Row():
input_text = gr.Textbox(placeholder="Paste your text here...", label="Text", lines=10, max_lines=15)
with gr.Row():
wc = gr.Markdown("0/500 words (Minimum 50 words)")
with gr.Row():
check_button = gr.Button("Check Origin", variant="primary", interactive=False)
clear_button = gr.ClearButton([input_text], variant="stop")
out = gr.Label(label='Result')
clear_button.add(out)
check_button.click(fn=update_detection_tab, inputs=[input_text], outputs=out)
input_text.change(count_words_detection_tab, input_text, wc, show_progress=False)
input_text.input(
active_button_detection_tab,
[input_text],
[check_button],
)
clear_button.click(
clear_detection_tab,
inputs=[],
outputs=[input_text, check_button],
)
with gr.Tab('Challenge Yourself!'):
with gr.Row():
generate = gr.Button("Generate Sample Text", variant="primary")
clear = gr.ClearButton([], variant="stop")
with gr.Row():
text = gr.Textbox(value="", label="Text", lines=20, interactive=False)
with gr.Row():
mg = gr.Button("Machine-Generated", variant="secondary", interactive=False)
hw = gr.Button("Human-Written", variant="secondary", interactive=False)
mh = gr.Button("Machine-Humanized", variant="secondary", interactive=False)
mp = gr.Button("Machine-Polished", variant="secondary", interactive=False)
with gr.Row():
result = gr.Label(label="Result", value="")
clear.add([result, text])
generate.click(generate_text_challenge_tab, [], [text, mg, hw, mh, mp, result])
for button in [mg, hw, mh, mp]:
button.click(show_result_challenge_tab, [button], [result, mg, hw, mh, mp])
clear.click(lambda: ("",
gr.Button("Machine-Generated", variant="secondary", interactive=False),
gr.Button("Human-Written", variant="secondary", interactive=False),
gr.Button("Machine-Humanized", variant="secondary", interactive=False),
gr.Button("Machine-Polished", variant="secondary", interactive=False),
""),
outputs=[text, mg, hw, mh, mp, result])
demo.launch(share=False)
|