Spaces:

wiraindrak
/

entity-based-sentiment-analysis

Runtime error

File size: 3,725 Bytes

2168cf5
faf61e8
 
 
2168cf5
 
6ce2f8e
2168cf5
7f68476
 
2168cf5
7f68476
ef26fd6
7f68476
 
a60235f
2168cf5
 
7f68476
 
 
 
 
 
2168cf5
42535f1
 
 
2168cf5
 
ef26fd6
7f68476
d961c51
faf61e8
7f68476
 
 
2168cf5
1df8439
ea552db
1df8439
faf61e8
 
 
d453dbe
 
faf61e8
 
 
 
 
 
 
 
 
 
 
2168cf5
faf61e8
 
 
660b172
faf61e8
 
 
660b172
faf61e8
 
 
 
2dd816c
 
 
 
 
b99df17
2dd816c
660b172
 
2dd816c
faf61e8
 
 
 
 
 
1df8439
faf61e8
 
 
 
 
 
 
d6501eb
 
 
38d1024
 
 
faf61e8
 
 
 
 
 
 
d6501eb

from transformers import pipeline
import matplotlib.pyplot as plt
import twitter_scraper as ts


import gradio as gr
from gradio.mix import Parallel

pretrained_sentiment = "w11wo/indonesian-roberta-base-sentiment-classifier"
pretrained_ner = "cahya/bert-base-indonesian-NER"

sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model=pretrained_sentiment,
    tokenizer=pretrained_sentiment,
    return_all_scores=True
)

ner_pipeline = pipeline(
    "ner",
    model=pretrained_ner,
    tokenizer=pretrained_ner
)

examples = [
    "Jokowi sangat kecewa dengan POLRI atas kerusuhan yang terjadi di Malang",
    "Lesti marah terhadap perlakuan KDRT yang dilakukan oleh Bilar",
    "Ungkapan rasa bahagia diutarakan oleh Coki Pardede karena kebabasannya dari penjara"
]

def sentiment_analysis(text):
    output = sentiment_pipeline(text)
    return {elm["label"]: elm["score"] for elm in output[0]}

def ner(text):
    output = ner_pipeline(text)
    return {"text": text, "entities": output}

def sentiment_ner(text):
    return sentiment_analysis(text), ner(text)

def sentiment_df(df):
    text_list = list(df["Text"].astype(str).values)
    result = [sentiment_analysis(text) for text in text_list]
    df['Label'] = [list(pred.keys())[0] for pred in result]
    df['Score'] = [round(list(pred.values())[0], 3) for pred in result]
    return df


def twitter_analyzer(keyword, max_tweets):
    df = ts.scrape_tweets(keyword, max_tweets=max_tweets)
    df["Text"] = df["Text"].apply(ts.preprocess_text)
    df = sentiment_df(df)
    fig = plt.figure()
    df.groupby(["Label"])["Text"].count().plot.pie(autopct="%.1f%%", figsize=(6,6))
    return fig, df[["URL", "Text", "Label", "Score"]]

if __name__ == "__main__":

    with gr.Blocks() as demo:

        gr.Markdown("""<h1 style="text-align:center">Entity Based Sentiment Analysis Indonesia</h1>""")

        gr.Markdown(
            """
            Creator: Wira Indra Kusuma
            """
            )

        with gr.Tab("Single Input"):
            with gr.Blocks():
                with gr.Row():
                    with gr.Column():
                        input_text = gr.Textbox(label="Input Text")
                        analyze_button = gr.Button(label="Analyze")
                        examples_bar = gr.Examples(examples=examples, inputs=input_text)
                    with gr.Column():
                        sent_output = gr.Label(label="Sentiment Analysis")
                        ner_output = gr.HighlightedText(label="Named Entity Recognition")

        with gr.Tab("Twitter"):
            with gr.Blocks():
                with gr.Row():
                    with gr.Column():
                        keyword_textbox = gr.Textbox(lines=1, label="Keyword")
                        max_tweets_component = gr.Number(value=10, label="Total of Tweets to Scrape", precision=0)
                        submit_button = gr.Button("Submit")

                    plot_component = gr.Plot(label="Pie Chart of Sentiments")
                dataframe_component = gr.DataFrame(type="pandas",
                                                label="Dataframe",
                                                max_rows=(20,'fixed'),
                                                overflow_row_behaviour='paginate',
                                                wrap=True)

        analyze_button.click(sentiment_ner, input_text, [sent_output, ner_output])
        submit_button.click(twitter_analyzer,
                    inputs=[keyword_textbox, max_tweets_component],
                    outputs=[plot_component, dataframe_component])

        gr.Markdown(
                """

                """

            )

    demo.launch(inbrowser=True)