Manasa1 commited on
Commit
10f6a71
·
verified ·
1 Parent(s): 52b9e07

Update tweet_analyzer.py

Browse files
Files changed (1) hide show
  1. tweet_analyzer.py +5 -5
tweet_analyzer.py CHANGED
@@ -78,7 +78,7 @@ class TweetDatasetProcessor:
78
  analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
79
  Core beliefs, emotional tendencies, cognitive patterns, etc.
80
  Tweets for analysis:
81
- {json.dumps(all_tweets[:30], indent=2)}
82
  """
83
 
84
  response = self.groq_client.chat.completions.create(
@@ -92,7 +92,7 @@ class TweetDatasetProcessor:
92
  self.personality_profile = response.choices[0].message.content
93
  return self.personality_profile
94
 
95
- def analyze_topics(self, n_topics=5):
96
  """Extract and identify different topics the author has tweeted about."""
97
  all_tweets = [tweet['content'] for tweet in self.tweets]
98
  vectorizer = TfidfVectorizer(stop_words='english')
@@ -120,18 +120,18 @@ class TweetDatasetProcessor:
120
  ]
121
 
122
  # Extract historical topics and add them to additional contexts
123
- historical_topics = self.analyze_topics(n_topics=10) # Consider more topics for greater diversity
124
  additional_contexts.extend(historical_topics)
125
 
126
  # Randomly select multiple contexts to increase diversity
127
  selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
128
 
129
  # Randomly sample tweets across different time periods to avoid repetition of topics
130
- tweet_sample = random.sample(self.tweets, min(30, len(self.tweets))) # Increase sample size for diversity
131
  all_tweets = [tweet['content'] for tweet in tweet_sample]
132
 
133
  # If personality profile is too long, truncate it (adjust length as needed)
134
- personality_profile_excerpt = self.personality_profile[:1000] # Truncate profile to first 1000 characters
135
 
136
  generation_prompt = f"""Based on this personality profile:
137
  {personality_profile_excerpt}
 
78
  analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
79
  Core beliefs, emotional tendencies, cognitive patterns, etc.
80
  Tweets for analysis:
81
+ {json.dumps(all_tweets[:20], indent=2)} # Reduce the number of tweets analyzed
82
  """
83
 
84
  response = self.groq_client.chat.completions.create(
 
92
  self.personality_profile = response.choices[0].message.content
93
  return self.personality_profile
94
 
95
+ def analyze_topics(self, n_topics=3): # Reduce the number of topics
96
  """Extract and identify different topics the author has tweeted about."""
97
  all_tweets = [tweet['content'] for tweet in self.tweets]
98
  vectorizer = TfidfVectorizer(stop_words='english')
 
120
  ]
121
 
122
  # Extract historical topics and add them to additional contexts
123
+ historical_topics = self.analyze_topics(n_topics=3) # Reduced number of topics
124
  additional_contexts.extend(historical_topics)
125
 
126
  # Randomly select multiple contexts to increase diversity
127
  selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
128
 
129
  # Randomly sample tweets across different time periods to avoid repetition of topics
130
+ tweet_sample = random.sample(self.tweets, min(20, len(self.tweets))) # Reduce the number of tweets sampled
131
  all_tweets = [tweet['content'] for tweet in tweet_sample]
132
 
133
  # If personality profile is too long, truncate it (adjust length as needed)
134
+ personality_profile_excerpt = self.personality_profile[:500] # Truncate further
135
 
136
  generation_prompt = f"""Based on this personality profile:
137
  {personality_profile_excerpt}