sashtech commited on
Commit
7fc55d1
1 Parent(s): 6ba2176

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -37
app.py CHANGED
@@ -1,20 +1,16 @@
1
- # Import dependencies
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import torch
5
- import nltk
6
- from nltk.corpus import wordnet
7
  import spacy
8
  import subprocess
 
 
9
 
10
- from gensim.models import KeyedVectors
11
  from gensim import downloader as api
12
- from nltk.tokenize import word_tokenize
13
-
14
- # Download NLTK data (if not already downloaded)
15
- nltk.download('punkt')
16
- nltk.download('stopwords')
17
 
 
 
 
18
 
19
  # Ensure the spaCy model is installed
20
  try:
@@ -42,37 +38,42 @@ def detect_ai_generated(text):
42
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
43
  return f"AI-Generated Content Probability: {ai_probability:.2f}%"
44
 
45
- # Function to get synonyms using Gensim Word2Vec
46
- def get_synonyms_gensim(word):
47
- try:
48
- synonyms = word_vectors.most_similar(positive=[word], topn=5)
49
- return [synonym[0] for synonym in synonyms]
50
- except KeyError:
51
- return []
52
-
53
- # Paraphrasing function using Gensim for synonym replacement
54
- def paraphrase_with_gensim(text):
55
- words = word_tokenize(text)
56
- paraphrased_words = []
57
- for word in words:
58
- synonyms = get_synonyms_gensim(word.lower())
59
- if synonyms:
60
- paraphrased_words.append(synonyms[0])
61
- else:
62
- paraphrased_words.append(word)
63
- return ' '.join(paraphrased_words)
64
 
65
- # Paraphrasing function using spaCy for synonym replacement
66
- def paraphrase_with_spacy(text):
67
  doc = nlp(text)
68
  paraphrased_words = []
 
69
  for token in doc:
70
- synonyms = get_synonyms_gensim(token.text.lower())
71
- if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}: # Only replace certain types of words
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  paraphrased_words.append(synonyms[0])
73
  else:
74
  paraphrased_words.append(token.text)
75
- return ' '.join(paraphrased_words)
 
 
 
76
 
77
  # Gradio interface definition
78
  with gr.Blocks() as interface:
@@ -80,14 +81,12 @@ with gr.Blocks() as interface:
80
  with gr.Column():
81
  text_input = gr.Textbox(lines=5, label="Input Text")
82
  detect_button = gr.Button("AI Detection")
83
- paraphrase_gensim_button = gr.Button("Paraphrase with Gensim")
84
- paraphrase_spacy_button = gr.Button("Paraphrase with spaCy")
85
  with gr.Column():
86
  output_text = gr.Textbox(label="Output")
87
 
88
  detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
89
- paraphrase_gensim_button.click(paraphrase_with_gensim, inputs=text_input, outputs=output_text)
90
- paraphrase_spacy_button.click(paraphrase_with_spacy, inputs=text_input, outputs=output_text)
91
 
92
  # Launch the Gradio app
93
  interface.launch(debug=False)
 
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
 
 
4
  import spacy
5
  import subprocess
6
+ import nltk
7
+ from nltk.corpus import wordnet
8
 
 
9
  from gensim import downloader as api
 
 
 
 
 
10
 
11
+ # Ensure necessary NLTK data is downloaded
12
+ nltk.download('wordnet')
13
+ nltk.download('omw-1.4')
14
 
15
  # Ensure the spaCy model is installed
16
  try:
 
38
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
39
  return f"AI-Generated Content Probability: {ai_probability:.2f}%"
40
 
41
+ # Function to get synonyms using NLTK WordNet
42
+ def get_synonyms_nltk(word, pos):
43
+ synsets = wordnet.synsets(word, pos=pos)
44
+ if synsets:
45
+ lemmas = synsets[0].lemmas()
46
+ return [lemma.name() for lemma in lemmas]
47
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # Paraphrasing function using spaCy and NLTK
50
+ def paraphrase_with_spacy_nltk(text):
51
  doc = nlp(text)
52
  paraphrased_words = []
53
+
54
  for token in doc:
55
+ # Map spaCy POS tags to WordNet POS tags
56
+ pos = None
57
+ if token.pos_ in {"NOUN"}:
58
+ pos = wordnet.NOUN
59
+ elif token.pos_ in {"VERB"}:
60
+ pos = wordnet.VERB
61
+ elif token.pos_ in {"ADJ"}:
62
+ pos = wordnet.ADJ
63
+ elif token.pos_ in {"ADV"}:
64
+ pos = wordnet.ADV
65
+
66
+ synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
67
+
68
+ # Replace with a synonym only if it makes sense
69
+ if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
70
  paraphrased_words.append(synonyms[0])
71
  else:
72
  paraphrased_words.append(token.text)
73
+
74
+ # Join the words back into a sentence
75
+ paraphrased_sentence = ' '.join(paraphrased_words)
76
+ return paraphrased_sentence
77
 
78
  # Gradio interface definition
79
  with gr.Blocks() as interface:
 
81
  with gr.Column():
82
  text_input = gr.Textbox(lines=5, label="Input Text")
83
  detect_button = gr.Button("AI Detection")
84
+ paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK")
 
85
  with gr.Column():
86
  output_text = gr.Textbox(label="Output")
87
 
88
  detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
89
+ paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
 
90
 
91
  # Launch the Gradio app
92
  interface.launch(debug=False)