Spaces:

aus10powell
/

TwitterAccounts

Runtime error

App Files Files Community

TwitterAccounts / scripts /twitter_scraper.py

aus10powell

Upload 74 files

8158335 over 1 year ago

raw

history blame

5.73 kB

	import snscrape.modules.twitter as sntwitter
	import pandas as pd
	import datetime as dt
	from tqdm import tqdm
	import requests


	def get_tweets(
	query: str,
	) -> list:
	"""
	Fetches tweets from Twitter based on a given query and returns a list of extracted tweet information.

	Args:
	query (str): The query to search for tweets on Twitter.

	Returns:
	A list of extracted tweet information.
	"""
	print(f"Fetching tweets with query: {query}")

	fetched_tweets = sntwitter.TwitterSearchScraper(query).get_items()
	return [extract_tweet_info(tweet) for tweet in tqdm(fetched_tweets)]


	def get_replies(username: str, conversation_id: str, max_tweets: int) -> list:
	"""
	Fetches the replies for a given Twitter user and conversation, and returns a list of extracted tweet information.

	Args:
	username (str): The username of the Twitter user whose replies are to be fetched.
	conversation_id (str): The ID of the conversation for which replies are to be fetched.

	Returns:
	A list of extracted tweet information for the replies.
	"""
	print(
	f"Fetching replies for username {username} and conversation {conversation_id}"
	)
	query = f"to:{username} since_id:{conversation_id} filter:safe"

	tweets_list = []
	for i, tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(query).get_items())):
	if i > max_tweets:
	break
	else:
	tweets_list.append(extract_tweet_info(tweet))
	return tweets_list


	def get_tweet_by_id_and_username(username: str, tweet_id: str):
	"""
	Fetches a tweet from Twitter based on the given username and tweet ID.

	Args:
	username (str): The username of the Twitter user who posted the tweet.
	tweet_id (str): The ID of the tweet to fetch.

	Returns:
	The fetched tweet.
	"""
	tweet_url = f"https://twitter.com/{username}/status/{tweet_id}"
	return sntwitter.TwitterSearchScraper(tweet_url).get_items()


	def extract_tweet_info(tweet):
	"""
	Extracts relevant information from a tweet object and returns a dictionary with the extracted values.

	Args:
	tweet: A tweet object.

	Returns:
	A dictionary with the extracted tweet information.
	"""
	return {
	"date": tweet.date,
	"username": tweet.user.username,
	"content": tweet.rawContent,
	"retweet_count": tweet.retweetCount,
	"tweet_id": tweet.id,
	"like_count": tweet.likeCount,
	"reply_count": tweet.replyCount,
	"in_reply_to_tweet_id": tweet.inReplyToTweetId,
	"conversation_id": tweet.conversationId,
	"view_count": tweet.viewCount,
	}


	def get_follower_ids(username: str, limit: int = 20):
	"""
	Retrieves a list of Twitter IDs for users who follow a given Twitter handle.

	Args:
	username (str): The Twitter handle to retrieve follower IDs for.
	limit (int): The maximum number of follower IDs to retrieve.

	Returns:
	A list of Twitter user IDs (as strings).
	"""
	# Construct the search query using snscrape
	query = f"from:{username} replies:True"

	start_date = dt.date(year=2023, month=3, day=10)
	end_date = dt.date(year=2023, month=3, day=22)
	query = f"from:{username} since:{start_date} until:{end_date}"
	tweets = get_tweets(query=query)
	one_tweet = tweets[-1]
	one_tweet_id = one_tweet["tweet_id"]

	replies = get_replies(
	username=username, conversation_id=one_tweet_id, max_tweets=1000
	)

	return one_tweet, replies


	def get_twitter_account_info(twitter_handle: str) -> dict:
	"""
	Extracts the name, username, follower count, and last tweet of a Twitter user using snscrape.

	Args:
	twitter_handle (str): The Twitter username to retrieve information for.

	Returns:
	dict: A dictionary containing the name, username, follower count, and last tweet of the Twitter user.
	"""

	# Create a TwitterUserScraper object
	user_scraper = sntwitter.TwitterUserScraper(twitter_handle)

	# Get the user's profile information
	user_profile = user_scraper.entity
	return {
	"name": user_profile.displayname,
	"username": user_profile.username,
	"user_id": user_profile.id,
	"follower_count": user_profile.followersCount,
	"friends_count": user_profile.friendsCount,
	"verified": user_profile.verified,
	}


	if __name__ == "__main__":
	## Testing extracting tweets from an account
	# Set the search variables (dates for when account tweeted. Does not take into account replies)
	account = "taylorlorenz"
	start_date = dt.date(year=2023, month=2, day=1)
	end_date = dt.date(year=2023, month=3, day=11)

	# Format the query string
	query = f"from:{account} since:{start_date} until:{end_date}"
	print(f"query: {query}")
	tweets = get_tweets(query=query)

	df_tweets = pd.DataFrame(data=tweets)
	df_tweets = df_tweets.sort_values("in_reply_to_tweet_id")
	# Uncomment to save output
	df_tweets.to_csv("df_tweets.csv")

	print(df_tweets.head(2))
	print(df_tweets.tail(2))
	print(f"Total Tweets: {len(tweets)}")

	## Testing extracting conversatin threeds from conversation Id
	conversation_id = (
	1620650202305798144 # A tweet from elon musk about turbulent times
	)
	max_tweets = 3000
	tweets = get_replies(
	username="elonmusk", conversation_id=conversation_id, max_tweets=max_tweets
	)
	df_replies = pd.DataFrame(data=tweets)

	# Uncomment to save output
	# df_replies.to_csv("df_replies.csv")
	print(
	f"Number of extracted tweets from conversation_id: {conversation_id}, {len(tweets)}"
	)