File size: 2,778 Bytes
5fcd3cf
edc005c
5fcd3cf
 
 
edc005c
5fcd3cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8de989
 
 
 
 
 
 
edc005c
c8de989
5fcd3cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8de989
 
 
5fcd3cf
 
 
 
 
 
 
 
 
c8de989
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from ast import Str
import gradio as gr
from tweetnlp import Sentiment, NER
from typing import Tuple, Dict
from statistics import mean

def clean_tweet(tweet: str, remove_chars: str = "@#") -> str:
    """Remove any unwanted characters

    Args:
        tweet (str): The raw tweet
        remove_chars (str, optional): The characters to remove. Defaults to "@#".

    Returns:
        str: The tweet with these characters removed
    """
    for char in remove_chars:
        tweet = tweet.replace(char, "")
    return tweet


def format_sentiment(model_output: Dict) -> Dict:
    """Format the output of the sentiment model

    Args:
        model_output (Dict): The model output

    Returns:
        Dict: The format for gradio
    """
    formatted_output = dict()
    if model_output["label"] == "positive":
        formatted_output["positive"] = model_output["probability"]
        formatted_output["negative"] = 1 - model_output["probability"]
    else:
        formatted_output["negative"] = model_output["probability"]
        formatted_output["positive"] = 1 - model_output["probability"]
    return formatted_output


def format_entities(model_output: Dict) -> Dict:
    """Format the output of the NER model

    Args:
        model_output (Dict): The model output

    Returns:
        Dict: The format for gradio
    """
    formatted_output = dict()
    for entity in model_output["entity_prediction"]:
        new_output = dict()
        name = " ".join(entity["entity"])
        entity_type = entity["type"]
        new_key = f"{name}:{entity_type}"
        new_value = mean(entity["probability"])
        formatted_output[new_key] = new_value
    return formatted_output


def classify(tweet: str) -> Tuple[Dict, Dict]:
    """Runs models

    Args:
        tweet (str): The raw tweet

    Returns:
        Tuple[Dict, Dict]: The formatted_sentiment and formatted_entities of the tweet
    """
    tweet = clean_tweet(tweet)
    # Get sentiment
    model_sentiment = se_model.sentiment(tweet)
    formatted_sentiment = format_sentiment(model_sentiment)
    # Get entities
    entities = ner_model.ner(tweet)
    formatted_entities = format_entities(entities)
    return formatted_sentiment, formatted_entities


if __name__ == "__main__":
    # https://github.com/cardiffnlp/tweetnlp
    se_model = Sentiment()
    ner_model = NER()

    # Get a few examples from: https://twitter.com/NFLFantasy
    examples = list()
    examples.append("Dameon Pierce is clearly the #Texans starter and he once again looks good")
    examples.append("Deebo Samuel had 150+ receiving yards in 4 games last year - the most by any receiver in the league.")

    iface = gr.Interface(fn=classify, inputs="text", outputs=["label", "label"], examples=examples)
    iface.launch()