aus10powell commited on
Commit
c8433b9
1 Parent(s): c34bc94

Update scripts/twitter_scraper.py

Browse files
Files changed (1) hide show
  1. scripts/twitter_scraper.py +25 -8
scripts/twitter_scraper.py CHANGED
@@ -9,6 +9,8 @@ import tweepy
9
  import configparser
10
  import os
11
  import pandas as pd
 
 
12
 
13
  def get_latest_account_tweets(handle):
14
  try:
@@ -16,13 +18,13 @@ def get_latest_account_tweets(handle):
16
  config = configparser.ConfigParser()
17
  config.read("tweepy_auth.ini")
18
  # Get the authentication details
19
- authentication_section = config['AUTHENTICATION']
20
  consumer_key = authentication_section["twitter_consumer_key"]
21
  consumer_secret = authentication_section["twitter_consumer_secret"]
22
  access_token = authentication_section["twitter_access_token"]
23
  access_token_secret = authentication_section["twitter_access_token_secret"]
24
  else:
25
- consumer_key = os.environ['twitter_consumer_key']
26
  consumer_secret = os.environ["twitter_consumer_secret"]
27
  access_token = os.environ["twitter_access_token"]
28
  access_token_secret = os.environ["twitter_access_token_secret"]
@@ -50,7 +52,7 @@ def get_latest_account_tweets(handle):
50
  df_tweets["handle"] = df_tweets.user.iloc[0]["screen_name"]
51
 
52
  return df_tweets
53
-
54
  except tweepy.TweepError as e:
55
  # Handle specific error conditions
56
  if e.api_code == 63:
@@ -60,7 +62,7 @@ def get_latest_account_tweets(handle):
60
  else:
61
  print("Error occurred during API call:", str(e))
62
  return str(e)
63
-
64
  except Exception as e:
65
  print("An error occurred:", str(e))
66
  return str(e)
@@ -68,8 +70,8 @@ def get_latest_account_tweets(handle):
68
 
69
 
70
  def get_tweets(
71
- query: str,
72
- ) -> list:
73
  """
74
  Fetches tweets from Twitter based on a given query and returns a list of extracted tweet information.
75
 
@@ -79,10 +81,25 @@ def get_tweets(
79
  Returns:
80
  A list of extracted tweet information.
81
  """
82
- print(f"Fetching tweets with query: {query}")
 
 
 
 
 
 
 
83
 
84
  fetched_tweets = sntwitter.TwitterSearchScraper(query).get_items()
85
- return [extract_tweet_info(tweet) for tweet in tqdm(fetched_tweets)]
 
 
 
 
 
 
 
 
86
 
87
 
88
  def get_replies(username: str, conversation_id: str, max_tweets: int) -> list:
 
9
  import configparser
10
  import os
11
  import pandas as pd
12
+ from datetime import datetime, date, timedelta
13
+
14
 
15
  def get_latest_account_tweets(handle):
16
  try:
 
18
  config = configparser.ConfigParser()
19
  config.read("tweepy_auth.ini")
20
  # Get the authentication details
21
+ authentication_section = config["AUTHENTICATION"]
22
  consumer_key = authentication_section["twitter_consumer_key"]
23
  consumer_secret = authentication_section["twitter_consumer_secret"]
24
  access_token = authentication_section["twitter_access_token"]
25
  access_token_secret = authentication_section["twitter_access_token_secret"]
26
  else:
27
+ consumer_key = os.environ["twitter_consumer_key"]
28
  consumer_secret = os.environ["twitter_consumer_secret"]
29
  access_token = os.environ["twitter_access_token"]
30
  access_token_secret = os.environ["twitter_access_token_secret"]
 
52
  df_tweets["handle"] = df_tweets.user.iloc[0]["screen_name"]
53
 
54
  return df_tweets
55
+
56
  except tweepy.TweepError as e:
57
  # Handle specific error conditions
58
  if e.api_code == 63:
 
62
  else:
63
  print("Error occurred during API call:", str(e))
64
  return str(e)
65
+
66
  except Exception as e:
67
  print("An error occurred:", str(e))
68
  return str(e)
 
70
 
71
 
72
  def get_tweets(
73
+ handle: str,
74
+ ):
75
  """
76
  Fetches tweets from Twitter based on a given query and returns a list of extracted tweet information.
77
 
 
81
  Returns:
82
  A list of extracted tweet information.
83
  """
84
+ # Get the current date
85
+ today = datetime.today()
86
+ two_months_ago = today - timedelta(days=2 * 30)
87
+
88
+ start_date = two_months_ago.strftime("%Y-%m-%d")
89
+ end_date = today.strftime("%Y-%m-%d")
90
+
91
+ query = f"from:{handle} since:{start_date} until:{end_date} -filter:replies -filter:retweets"
92
 
93
  fetched_tweets = sntwitter.TwitterSearchScraper(query).get_items()
94
+ tweets = [extract_tweet_info(tweet) for tweet in tqdm(fetched_tweets)]
95
+ df_tweets = pd.DataFrame(tweets)
96
+ df_tweets["full_text"] = df_tweets["content"]
97
+ df_tweets["clean_text"] = df_tweets["full_text"].apply(
98
+ lambda r: sentiment.tweet_cleaner(r)
99
+ )
100
+ df_tweets["handle"] = df_tweets["username"]
101
+ df_tweets["created_at"] = df_tweets["date"]
102
+ return df_tweets
103
 
104
 
105
  def get_replies(username: str, conversation_id: str, max_tweets: int) -> list: