Spaces:
Runtime error
Runtime error
File size: 3,784 Bytes
e03530d 6796bfc 071cb1b f4db971 afb9463 b360479 6796bfc afb9463 3ea613d 19ded2a afb9463 b143835 f4db971 3ea613d f4db971 3ea613d dc1bf57 86f79dd dc2ae70 86f79dd 6796bfc 071cb1b 115ff0b 360bf8a 6796bfc 360bf8a 071cb1b 360bf8a 1cf959e f42cc17 1cf959e b360479 6796bfc 768cc89 793922a 768cc89 e063d7c 6796bfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import streamlit as st
import pandas as pd
from matplotlib import pyplot as plt
import twint
import nest_asyncio
import multiprocessing.pool
import functools
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
import csv
import urllib.request
import IPython.display as ipd
st.write('Loading...')
# Preprocess text (username and link placeholders)
def preprocess(text):
new_text = []
for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t
t = 'http' if t.startswith('http') else t
new_text.append(t)
return " ".join(new_text)
# Loading pretrained model
MODEL = 'cardiffnlp/twitter-roberta-base-sentiment'
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)
tokenizer.save_pretrained(MODEL)
# Func to get a score using the above model
def combined_score(text):
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
return -scores[0] + scores[2] # scores = [negative, neutral, positive]
# https://stackoverflow.com/questions/492519/timeout-on-a-function-call
def timeout(max_timeout):
"""Timeout decorator, parameter in seconds."""
def timeout_decorator(item):
"""Wrap the original function."""
@functools.wraps(item)
def func_wrapper(*args, **kwargs):
"""Closure for function."""
pool = multiprocessing.pool.ThreadPool(processes=1)
async_result = pool.apply_async(item, args, kwargs)
# raises a TimeoutError if execution exceeds max_timeout
return async_result.get(max_timeout)
return func_wrapper
return timeout_decorator
# Getting tweets from a user
@timeout(120.0)
def get_tweets(username, limit=500, save_name=None):
#nest_asyncio.apply() # Helps avoid RuntimeError: This event loop is already running
# Setup config
c = twint.Config() # Create a config object to store our settings
c.Limit = limit # Max number of tweets to fetch (increments of 20)
c.Username = username # User of interest
c.Pandas = True # Store tweets in a dataframe
c.Hide_output = True # Avoid printing out tweets
# Run the seearch
twint.run.Search(c)
# Get the results and optionally save to a file as well
df = twint.storage.panda.Tweets_df
if save_name != None:
df.to_csv(save_name)
return df
title = st.title('Twitter Sentiment Map Thingee')
with st.form("my_form"):
st.write("Parameters:")
user = st.text_input("Twitter Username")
n_tweets = st.slider('How Many Tweets', 20, 2000, 20)
# Every form must have a submit button.
submitted = st.form_submit_button("Submit")
if submitted:
st.write("Fetching user", user, "n_tweets", n_tweets)
tweets = get_tweets(user, limit=n_tweets)
st.write("Resulting dataframe shape:", tweets.shape)
st.write("Calculating sentiments...")
tweets['sentiment'] = tweets['tweet'].map(lambda s: combined_score(s))
tweets['tweet_length'] = tweets['tweet'].map(lambda s: len(s))
st.write("Average sentiment:", tweets.sentiment.mean())
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
axs[0].hexbin(tweets['tweet_length'], tweets['sentiment']*1,
gridsize=20, bins=12, cmap='inferno')
axs[0].set_title('Tweet Sentiment and Length')
axs[1].scatter(tweets['tweet_length'], tweets['sentiment'])
axs[1].set_title('Tweet Sentiment vs Length')
plt.setp(axs[:], xlabel='Tweet Length')
plt.setp(axs[:], ylabel='Sentiment')
st.pyplot(fig) |