huamnifierWithSimpleGrammer

Sleeping

App Files Files

sashtech commited on Aug 31

Commit

7fc55d1

•

1 Parent(s): 6ba2176

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -37

app.py CHANGED Viewed

@@ -1,20 +1,16 @@
-# Import dependencies
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
-import nltk
-from nltk.corpus import wordnet
 import spacy
 import subprocess
-from gensim.models import KeyedVectors
 from gensim import downloader as api
-from nltk.tokenize import word_tokenize
-# Download NLTK data (if not already downloaded)
-nltk.download('punkt')
-nltk.download('stopwords')
 # Ensure the spaCy model is installed
 try:
@@ -42,37 +38,42 @@ def detect_ai_generated(text):
     ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
     return f"AI-Generated Content Probability: {ai_probability:.2f}%"
-# Function to get synonyms using Gensim Word2Vec
-def get_synonyms_gensim(word):
-    try:
-        synonyms = word_vectors.most_similar(positive=[word], topn=5)
-        return [synonym[0] for synonym in synonyms]
-    except KeyError:
-        return []
-# Paraphrasing function using Gensim for synonym replacement
-def paraphrase_with_gensim(text):
-    words = word_tokenize(text)
-    paraphrased_words = []
-    for word in words:
-        synonyms = get_synonyms_gensim(word.lower())
-        if synonyms:
-            paraphrased_words.append(synonyms[0])
-        else:
-            paraphrased_words.append(word)
-    return ' '.join(paraphrased_words)
-# Paraphrasing function using spaCy for synonym replacement
-def paraphrase_with_spacy(text):
     doc = nlp(text)
     paraphrased_words = []
     for token in doc:
-        synonyms = get_synonyms_gensim(token.text.lower())
-        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}:  # Only replace certain types of words
             paraphrased_words.append(synonyms[0])
         else:
             paraphrased_words.append(token.text)
-    return ' '.join(paraphrased_words)
 # Gradio interface definition
 with gr.Blocks() as interface:
@@ -80,14 +81,12 @@ with gr.Blocks() as interface:
         with gr.Column():
             text_input = gr.Textbox(lines=5, label="Input Text")
             detect_button = gr.Button("AI Detection")
-            paraphrase_gensim_button = gr.Button("Paraphrase with Gensim")
-            paraphrase_spacy_button = gr.Button("Paraphrase with spaCy")
         with gr.Column():
             output_text = gr.Textbox(label="Output")
     detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
-    paraphrase_gensim_button.click(paraphrase_with_gensim, inputs=text_input, outputs=output_text)
-    paraphrase_spacy_button.click(paraphrase_with_spacy, inputs=text_input, outputs=output_text)
 # Launch the Gradio app
 interface.launch(debug=False)

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import spacy
 import subprocess
+import nltk
+from nltk.corpus import wordnet
 from gensim import downloader as api
+# Ensure necessary NLTK data is downloaded
+nltk.download('wordnet')
+nltk.download('omw-1.4')
 # Ensure the spaCy model is installed
 try:
     ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
     return f"AI-Generated Content Probability: {ai_probability:.2f}%"
+# Function to get synonyms using NLTK WordNet
+def get_synonyms_nltk(word, pos):
+    synsets = wordnet.synsets(word, pos=pos)
+    if synsets:
+        lemmas = synsets[0].lemmas()
+        return [lemma.name() for lemma in lemmas]
+    return []
+# Paraphrasing function using spaCy and NLTK
+def paraphrase_with_spacy_nltk(text):
     doc = nlp(text)
     paraphrased_words = []
     for token in doc:
+        # Map spaCy POS tags to WordNet POS tags
+        pos = None
+        if token.pos_ in {"NOUN"}:
+            pos = wordnet.NOUN
+        elif token.pos_ in {"VERB"}:
+            pos = wordnet.VERB
+        elif token.pos_ in {"ADJ"}:
+            pos = wordnet.ADJ
+        elif token.pos_ in {"ADV"}:
+            pos = wordnet.ADV
+        synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
+        # Replace with a synonym only if it makes sense
+        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
             paraphrased_words.append(synonyms[0])
         else:
             paraphrased_words.append(token.text)
+    # Join the words back into a sentence
+    paraphrased_sentence = ' '.join(paraphrased_words)
+    return paraphrased_sentence
 # Gradio interface definition
 with gr.Blocks() as interface:
         with gr.Column():
             text_input = gr.Textbox(lines=5, label="Input Text")
             detect_button = gr.Button("AI Detection")
+            paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK")
         with gr.Column():
             output_text = gr.Textbox(label="Output")
     detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
+    paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
 # Launch the Gradio app
 interface.launch(debug=False)