Spaces:
Runtime error
Runtime error
import snscrape.modules.twitter as sntwitter | |
import pandas as pd | |
import datetime as dt | |
from tqdm import tqdm | |
import requests | |
def get_tweets( | |
query: str, | |
) -> list: | |
""" | |
Fetches tweets from Twitter based on a given query and returns a list of extracted tweet information. | |
Args: | |
query (str): The query to search for tweets on Twitter. | |
Returns: | |
A list of extracted tweet information. | |
""" | |
print(f"Fetching tweets with query: {query}") | |
fetched_tweets = sntwitter.TwitterSearchScraper(query).get_items() | |
return [extract_tweet_info(tweet) for tweet in tqdm(fetched_tweets)] | |
def get_replies(username: str, conversation_id: str, max_tweets: int) -> list: | |
""" | |
Fetches the replies for a given Twitter user and conversation, and returns a list of extracted tweet information. | |
Args: | |
username (str): The username of the Twitter user whose replies are to be fetched. | |
conversation_id (str): The ID of the conversation for which replies are to be fetched. | |
Returns: | |
A list of extracted tweet information for the replies. | |
""" | |
print( | |
f"Fetching replies for username {username} and conversation {conversation_id}" | |
) | |
query = f"to:{username} since_id:{conversation_id} filter:safe" | |
tweets_list = [] | |
for i, tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(query).get_items())): | |
if i > max_tweets: | |
break | |
else: | |
tweets_list.append(extract_tweet_info(tweet)) | |
return tweets_list | |
def get_tweet_by_id_and_username(username: str, tweet_id: str): | |
""" | |
Fetches a tweet from Twitter based on the given username and tweet ID. | |
Args: | |
username (str): The username of the Twitter user who posted the tweet. | |
tweet_id (str): The ID of the tweet to fetch. | |
Returns: | |
The fetched tweet. | |
""" | |
tweet_url = f"https://twitter.com/{username}/status/{tweet_id}" | |
return sntwitter.TwitterSearchScraper(tweet_url).get_items() | |
def extract_tweet_info(tweet): | |
""" | |
Extracts relevant information from a tweet object and returns a dictionary with the extracted values. | |
Args: | |
tweet: A tweet object. | |
Returns: | |
A dictionary with the extracted tweet information. | |
""" | |
return { | |
"date": tweet.date, | |
"username": tweet.user.username, | |
"content": tweet.rawContent, | |
"retweet_count": tweet.retweetCount, | |
"tweet_id": tweet.id, | |
"like_count": tweet.likeCount, | |
"reply_count": tweet.replyCount, | |
"in_reply_to_tweet_id": tweet.inReplyToTweetId, | |
"conversation_id": tweet.conversationId, | |
"view_count": tweet.viewCount, | |
} | |
def get_follower_ids(username: str, limit: int = 20): | |
""" | |
Retrieves a list of Twitter IDs for users who follow a given Twitter handle. | |
Args: | |
username (str): The Twitter handle to retrieve follower IDs for. | |
limit (int): The maximum number of follower IDs to retrieve. | |
Returns: | |
A list of Twitter user IDs (as strings). | |
""" | |
# Construct the search query using snscrape | |
query = f"from:{username} replies:True" | |
start_date = dt.date(year=2023, month=3, day=10) | |
end_date = dt.date(year=2023, month=3, day=22) | |
query = f"from:{username} since:{start_date} until:{end_date}" | |
tweets = get_tweets(query=query) | |
one_tweet = tweets[-1] | |
one_tweet_id = one_tweet["tweet_id"] | |
replies = get_replies( | |
username=username, conversation_id=one_tweet_id, max_tweets=1000 | |
) | |
return one_tweet, replies | |
def get_twitter_account_info(twitter_handle: str) -> dict: | |
""" | |
Extracts the name, username, follower count, and last tweet of a Twitter user using snscrape. | |
Args: | |
twitter_handle (str): The Twitter username to retrieve information for. | |
Returns: | |
dict: A dictionary containing the name, username, follower count, and last tweet of the Twitter user. | |
""" | |
# Create a TwitterUserScraper object | |
user_scraper = sntwitter.TwitterUserScraper(twitter_handle) | |
# Get the user's profile information | |
user_profile = user_scraper.entity | |
return { | |
"name": user_profile.displayname, | |
"username": user_profile.username, | |
"user_id": user_profile.id, | |
"follower_count": user_profile.followersCount, | |
"friends_count": user_profile.friendsCount, | |
"verified": user_profile.verified, | |
} | |
if __name__ == "__main__": | |
## Testing extracting tweets from an account | |
# Set the search variables (dates for when account tweeted. Does not take into account replies) | |
account = "taylorlorenz" | |
start_date = dt.date(year=2023, month=2, day=1) | |
end_date = dt.date(year=2023, month=3, day=11) | |
# Format the query string | |
query = f"from:{account} since:{start_date} until:{end_date}" | |
print(f"query: {query}") | |
tweets = get_tweets(query=query) | |
df_tweets = pd.DataFrame(data=tweets) | |
df_tweets = df_tweets.sort_values("in_reply_to_tweet_id") | |
# Uncomment to save output | |
df_tweets.to_csv("df_tweets.csv") | |
print(df_tweets.head(2)) | |
print(df_tweets.tail(2)) | |
print(f"Total Tweets: {len(tweets)}") | |
## Testing extracting conversatin threeds from conversation Id | |
conversation_id = ( | |
1620650202305798144 # A tweet from elon musk about turbulent times | |
) | |
max_tweets = 3000 | |
tweets = get_replies( | |
username="elonmusk", conversation_id=conversation_id, max_tweets=max_tweets | |
) | |
df_replies = pd.DataFrame(data=tweets) | |
# Uncomment to save output | |
# df_replies.to_csv("df_replies.csv") | |
print( | |
f"Number of extracted tweets from conversation_id: {conversation_id}, {len(tweets)}" | |
) | |