Spaces:

demomern
/

Apps-Reviews-for-Requirements-Elicitation

Sleeping

App Files Files Community

demomern commited on Sep 9, 2023

Commit

3ca505b

1 Parent(s): 97f9f67

Create app.py

Browse files

Files changed (1) hide show

app.py +92 -0

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import re
+import emoji
+import spacy
+import joblib
+from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import LabelEncoder
+from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
+import gradio as gr
+nlp = spacy.load("en_core_web_sm")
+# load the TF-IDF vectorizer to a file
+cv = joblib.load('tfidf_vectorizer.pkl')
+# load the MLP classifier to a file
+mlp_label = joblib.load('mlpLabel.pkl')
+# load the MLP Aspect classifier to a file
+mlp_aspect_label = joblib.load('mlpAspectLabel.pkl')
+def remove_html(text) :
+    patt_html = r"<.*?>"
+    text = re.sub(patt_html, "", text)
+    return text
+def remove_url(text):
+    patt_url = r"https?://\S+|www\.\S+"
+    text = re.sub(patt_url, "", text)
+    return text
+def emoji_to_text(text) :
+    res_str = ""
+    for ch in text :
+        if emoji.is_emoji(ch) :
+            res_str += f" {emoji.demojize(ch)} "
+            # print(ch, emoji.demojize(ch))
+        else :
+            res_str += ch
+    return res_str
+def clean_review_text(text):
+    # remove HTML Tags
+    text = remove_html(text)
+    # remove url to call function remover_url
+    text = remove_url(text)
+    # convert text emoji into text
+    text = emoji_to_text(text)
+    # convert all text into lower case
+    text = text.lower()
+    # create spacy document to remove :
+    # token.is_stop => return true if word is stop word ( is, am, are, a, an, the etc )
+    # token.is_punct => return true if word is punctuation ( ., !, , :, ; etc)
+    # token.is_space => return true if word as a space like tab, space ..
+    # token.lemma_ convert any word into root word ( go | went | gone | going => go )
+    doc = nlp(text)
+    clean_tokens_wds = [ token.lemma_ for token in doc if not ( token.is_stop or token.is_punct or token.is_space ) ]
+    return " ".join(clean_tokens_wds)
+label = ['negative', 'neutral', 'positive']
+aspect_label = ['Card Decks and Challenges', 'Card Play and Board Games',
+       'Fun and Coin Collecting', 'Game Scores and Features',
+       'Game Updates and User Desires', 'Gameplay and App Experience',
+       'Gameplay and Trading', 'Gameplay and User Experience',
+       'Property and Land Management', 'Subway Adventures']
+def return_label_aspect(Review):
+  review_vec = cv.transform([clean_review_text(Review)])
+  pred_label = mlp_label.predict_proba(review_vec)[0]
+  pred_aspect = mlp_aspect_label.predict_proba(review_vec)[0]
+  pred_label = { label[i]: round(pred_label[i], 2) for i in range(3)  }
+  pred_aspect = { aspect_label[i]: round(pred_aspect[i], 2) for i in range(10)  }
+  return pred_label, pred_aspect
+iface = gr.Interface(fn=return_label_aspect, inputs="text", outputs=[gr.Label(), gr.Label()])
+iface.launch(inline = False)