Spaces:

ThanaphonJoe
/

hug101

Runtime error

App Files Files Community

ThanaphonJoe commited on Feb 24, 2024

Commit

77f9dcf

verified ·

1 Parent(s): 43452d4

test

Browse files

Files changed (1) hide show

app.py +144 -48

app.py CHANGED Viewed

@@ -1,15 +1,107 @@
 import gradio as gr
-import pickle
 import os
-import numpy as np
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import confusion_matrix
 import matplotlib.pyplot as plt
 import re
 from pythainlp.util import normalize
-from pythainlp.corpus import thai_stopwords
 from pythainlp.tokenize import word_tokenize
 def deEmojify(text):
@@ -37,13 +129,6 @@ def deEmojify(text):
 def clean_me(data):
-  stopwords = list(thai_stopwords())
-  stopwords.append("nan")
-  stopwords.append("-")
-  stopwords.append("_")
-  stopwords.append("")
-  stopwords.append(" ")
   data['clean_text'] = data['text'].str.replace(r'<[^<>]*>', '', regex=True)
   data['clean2_text']= data['clean_text'].str.strip().str.lower().str.replace('\r+', ' ').str.replace('\n+',' ').str.replace('\t+',' ')
   data['clean3_text'] = data.apply(lambda row: deEmojify(row['clean2_text']), axis=1)
@@ -54,16 +139,49 @@ def clean_me(data):
   # Join the wordsegged with space
   data['wordseged_space_text'] = data.apply(lambda row: " ".join(row["wordseged_text"]), axis=1)
   return(data)
 def combine(a, b):
     data = pd.DataFrame()
-    data['text'] = [a]
     data = clean_me(data)
-    a = data['wordseged_space_text'][0] + '123'
-    return a + " " + b
 def mirror(x):
@@ -72,36 +190,14 @@ def mirror(x):
 with gr.Blocks() as demo:
-    txt = gr.Textbox(label="Input", lines=2)
-    txt_2 = gr.Textbox(label="Input 2")
-    txt_3 = gr.Textbox(value="", label="Output")
-    btn = gr.Button(value="Submit")
-    btn.click(combine, inputs=[txt, txt_2], outputs=[txt_3])
-    with gr.Row():
-        im = gr.Image()
-        im_2 = gr.Image()
-    btn = gr.Button(value="Mirror Image")
-    btn.click(mirror, inputs=[im], outputs=[im_2])
-    gr.Markdown("## Text Examples")
-    gr.Examples(
-        [["hi", "Adam"], ["hello", "Eve"]],
-        [txt, txt_2],
-        txt_3,
-        combine,
-        cache_examples=True,
-    )
-    gr.Markdown("## Image Examples")
-    gr.Examples(
-        examples=[os.path.join(os.path.dirname(__file__), "lion.jpg")],
-        inputs=im,
-        outputs=im_2,
-        fn=mirror,
-        cache_examples=True,
-    )
 if __name__ == "__main__":

 import gradio as gr
 import os
 import matplotlib.pyplot as plt
+import pandas as pd
 import re
 from pythainlp.util import normalize
 from pythainlp.tokenize import word_tokenize
+from pythainlp import word_vector
+import numpy as np
+import keras
+import plotly.express as px
+#################
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+import time
+import chromedriver_autoinstaller
+import sys
+sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
+# setup chrome options
+chrome_options = webdriver.ChromeOptions()
+chrome_options.add_argument('--headless') # ensure GUI is off
+chrome_options.add_argument('--no-sandbox')
+chrome_options.add_argument('--disable-dev-shm-usage')
+# set path to chromedriver as per your configuration
+chromedriver_autoinstaller.install()
+wv = word_vector.WordVector()
+word2vec = wv.get_model()
+model= keras.models.load_model('my_model3.h5')
+def get_comments(VIDEO_URL):
+    # Initialize the WebDriver
+    driver = webdriver.Chrome(options=chrome_options)
+    # Your scraping code here
+    #VIDEO_URL = 'https://www.youtube.com/watch?v=VIDEO_ID'
+    driver.get(VIDEO_URL)
+    # Wait for the comments to load
+    time.sleep(5)
+    # Scroll down to load more comments (optional, repeat as needed)
+    driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.END)
+    time.sleep(2)
+    # Find and print comments
+    comment_elements = driver.find_elements(By.XPATH, '//yt-formatted-string[@id="content-text"]')
+    data = []
+    for comment in comment_elements:
+        if comment != '':
+            data.append(comment.text)
+            print(comment.text)
+    # Close the WebDriver
+    driver.quit()
+    return data
+def cosine_sim(u, v):
+    return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))
+def sentences_to_indices(X, word2vec, max_len):
+    """
+    Converts an array of sentences (strings) into an array of indices corresponding to words in the sentences.
+    The output shape should be such that it can be given to `Embedding()`.
+    Arguments:
+    X -- array of sentences (strings), of shape (m, 1)
+    word2vec -- a trained Word2Vec model from gensim
+    max_len -- maximum number of words in a sentence. You can assume every sentence in X is no longer than this.
+    Returns:
+    X_indices -- array of indices corresponding to words in the sentences from X, of shape (m, max_len)
+    """
+    m = X.shape[0]                                   # number of training examples
+    # Initialize X_indices as a numpy matrix of zeros and the correct shape
+    X_indices = np.zeros((m, max_len))
+    for i in range(m):                               # loop over training examples
+        # Convert the ith training sentence in lower case and split is into words. You should get a list of words.
+        # print(X)
+        # print(len(X[i].lower().split()))
+        sentence_words = X[i].lower().split()[:max_len]
+        # Initialize j to 0
+        j = 0
+        try:
+        # Loop over the words of sentence_words
+          for w in sentence_words:
+              # Set the (i,j)th entry of X_indices to the index of the correct word.
+                if w in word2vec.key_to_index:
+                    X_indices[i, j] = word2vec.key_to_index[w]
+                    # Increment j to j + 1
+                    j += 1
+        except:
+              print('key error: ', w)
+    return X_indices
 def deEmojify(text):
 def clean_me(data):
   data['clean_text'] = data['text'].str.replace(r'<[^<>]*>', '', regex=True)
   data['clean2_text']= data['clean_text'].str.strip().str.lower().str.replace('\r+', ' ').str.replace('\n+',' ').str.replace('\t+',' ')
   data['clean3_text'] = data.apply(lambda row: deEmojify(row['clean2_text']), axis=1)
   # Join the wordsegged with space
   data['wordseged_space_text'] = data.apply(lambda row: " ".join(row["wordseged_text"]), axis=1)
   return(data)
+def pretty_output(lines, sentiment):
+    label = np.array(['Neg', 'Neu', 'Pos'])
+    txt_sentiment = label[np.argmax(sentiment, axis=1)]
+    seriesText = pd.Series(txt_sentiment).value_counts()
+    df = pd.DataFrame({'Sentiment': seriesText.index, 'Count': seriesText.values})
+    fig = px.bar(df, x='Sentiment', y='Count', color='Sentiment')
+    fig.update_xaxes(categoryorder='array', categoryarray=['Neg', 'Neu', 'Pos'])
+    txt_pos = ''
+    txt_neu = ''
+    txt_neg = ''
+    for (x, y, score) in zip(lines, txt_sentiment, sentiment,):
+        txt_score = [f"{i:.2f}" for i in score]
+        tmp = f'{y} {txt_score}:-{x} \n'
+        if y == 'Pos':
+            txt_pos += tmp
+        elif y == 'Neu':
+            txt_neu += tmp
+        else:
+            txt_neg += tmp
+    return(txt_pos, txt_neu, txt_neg, fig)
 def combine(a, b):
     data = pd.DataFrame()
+    lines = str.split(a, '\n')
+    if b != "":
+        lines = get_comments(b)
+        if lines == []:
+            text001 = 'CANNOT_GET DATA from Youtube'
+            print(text001)
+    data['text'] = lines
     data = clean_me(data)
+    a = data['wordseged_space_text'][0] + ' SENTIMENT: '
+    X_train_indices = sentences_to_indices(data['wordseged_space_text'].values, word2vec, 128)
+    result = model.predict(X_train_indices[:])
+    txt_pos, txt_neu, txt_neg, fig = pretty_output(lines,result)
+    return txt_pos, txt_neu, txt_neg, fig
 def mirror(x):
 with gr.Blocks() as demo:
+    txt   = gr.Textbox(label="Input: TEXT", lines=2)
+    txt_2 = gr.Textbox(label="Input: Youtube URL")
+    btn   = gr.Button(value="Submit")
+    txt_POS = gr.Textbox(value="", label="Positive comments")
+    txt_NEU = gr.Textbox(value="", label="Neutral comments")
+    txt_NEG = gr.Textbox(value="", label="Negative comments")
+    plot = gr.Plot(label="Plot")
+    btn.click(combine, inputs=[txt, txt_2], outputs=[txt_POS, txt_NEU, txt_NEG, plot])
 if __name__ == "__main__":