Manasa1 commited on
Commit
60b3b65
·
verified ·
1 Parent(s): 10f6a71

Update tweet_analyzer.py

Browse files
Files changed (1) hide show
  1. tweet_analyzer.py +36 -11
tweet_analyzer.py CHANGED
@@ -78,7 +78,7 @@ class TweetDatasetProcessor:
78
  analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
79
  Core beliefs, emotional tendencies, cognitive patterns, etc.
80
  Tweets for analysis:
81
- {json.dumps(all_tweets[:20], indent=2)} # Reduce the number of tweets analyzed
82
  """
83
 
84
  response = self.groq_client.chat.completions.create(
@@ -92,7 +92,7 @@ class TweetDatasetProcessor:
92
  self.personality_profile = response.choices[0].message.content
93
  return self.personality_profile
94
 
95
- def analyze_topics(self, n_topics=3): # Reduce the number of topics
96
  """Extract and identify different topics the author has tweeted about."""
97
  all_tweets = [tweet['content'] for tweet in self.tweets]
98
  vectorizer = TfidfVectorizer(stop_words='english')
@@ -109,9 +109,16 @@ class TweetDatasetProcessor:
109
  topics = list(set(topics))
110
  return topics
111
 
 
 
 
 
 
112
  def generate_tweet(self, context=""):
113
  """Generate a new tweet based on personality profile and optional context."""
114
- additional_contexts = [
 
 
115
  "Comment on a recent technological advancement.",
116
  "Share a motivational thought.",
117
  "Discuss a current trending topic.",
@@ -119,35 +126,52 @@ class TweetDatasetProcessor:
119
  "Provide advice to followers."
120
  ]
121
 
122
- # Extract historical topics and add them to additional contexts
123
- historical_topics = self.analyze_topics(n_topics=3) # Reduced number of topics
124
- additional_contexts.extend(historical_topics)
125
-
126
  # Randomly select multiple contexts to increase diversity
127
  selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
128
 
129
  # Randomly sample tweets across different time periods to avoid repetition of topics
130
- tweet_sample = random.sample(self.tweets, min(20, len(self.tweets))) # Reduce the number of tweets sampled
131
  all_tweets = [tweet['content'] for tweet in tweet_sample]
132
 
133
  # If personality profile is too long, truncate it (adjust length as needed)
134
- personality_profile_excerpt = self.personality_profile[:500] # Truncate further
135
 
136
- generation_prompt = f"""Based on this personality profile:
 
137
  {personality_profile_excerpt}
138
  Current context or topic (if any):
139
  {context}
140
  Additionally, consider these contexts to increase diversity:
141
  {', '.join(selected_contexts)}
142
 
 
 
 
143
  **Only generate the tweet. Do not include analysis, explanation, or any other content.**
144
  """
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  try:
147
  response = self.groq_client.chat.completions.create(
148
  messages=[
149
  {"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
150
- {"role": "user", "content": generation_prompt},
151
  ],
152
  model="llama-3.1-70b-versatile",
153
  temperature=1.0, # Increased temperature for more diversity
@@ -159,3 +183,4 @@ class TweetDatasetProcessor:
159
  except Exception as e:
160
  print(f"Error generating tweet: {e}")
161
  return "Error generating tweet"
 
 
78
  analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
79
  Core beliefs, emotional tendencies, cognitive patterns, etc.
80
  Tweets for analysis:
81
+ {json.dumps(all_tweets[:5], indent=2)} # Further reduced number of tweets
82
  """
83
 
84
  response = self.groq_client.chat.completions.create(
 
92
  self.personality_profile = response.choices[0].message.content
93
  return self.personality_profile
94
 
95
+ def analyze_topics(self, n_topics=3): # Reduced the number of topics
96
  """Extract and identify different topics the author has tweeted about."""
97
  all_tweets = [tweet['content'] for tweet in self.tweets]
98
  vectorizer = TfidfVectorizer(stop_words='english')
 
109
  topics = list(set(topics))
110
  return topics
111
 
112
+ def count_tokens(self, text):
113
+ """Estimate the number of tokens in the given text."""
114
+ # A basic token count estimation (approximate)
115
+ return len(text.split())
116
+
117
  def generate_tweet(self, context=""):
118
  """Generate a new tweet based on personality profile and optional context."""
119
+ # Extract historical topics and add them to additional contexts
120
+ historical_topics = self.analyze_topics(n_topics=3) # Reduced number of topics
121
+ additional_contexts = historical_topics + [
122
  "Comment on a recent technological advancement.",
123
  "Share a motivational thought.",
124
  "Discuss a current trending topic.",
 
126
  "Provide advice to followers."
127
  ]
128
 
 
 
 
 
129
  # Randomly select multiple contexts to increase diversity
130
  selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
131
 
132
  # Randomly sample tweets across different time periods to avoid repetition of topics
133
+ tweet_sample = random.sample(self.tweets, min(5, len(self.tweets))) # Further reduced number of tweets
134
  all_tweets = [tweet['content'] for tweet in tweet_sample]
135
 
136
  # If personality profile is too long, truncate it (adjust length as needed)
137
+ personality_profile_excerpt = self.personality_profile[:400] # Further truncation
138
 
139
+ # Combine everything and check token count
140
+ prompt = f"""Based on this personality profile:
141
  {personality_profile_excerpt}
142
  Current context or topic (if any):
143
  {context}
144
  Additionally, consider these contexts to increase diversity:
145
  {', '.join(selected_contexts)}
146
 
147
+ Tweets for context:
148
+ {', '.join(all_tweets)}
149
+
150
  **Only generate the tweet. Do not include analysis, explanation, or any other content.**
151
  """
152
 
153
+ token_count = self.count_tokens(prompt)
154
+ if token_count > 6000: # Limit to 6000 tokens (adjust as needed)
155
+ # Further truncate the tweet and topics if token limit is exceeded
156
+ all_tweets = all_tweets[:3] # Reduce the number of tweets used
157
+ prompt = f"""Based on this personality profile:
158
+ {personality_profile_excerpt}
159
+ Current context or topic (if any):
160
+ {context}
161
+ Additionally, consider these contexts to increase diversity:
162
+ {', '.join(selected_contexts)}
163
+
164
+ Tweets for context:
165
+ {', '.join(all_tweets)}
166
+
167
+ **Only generate the tweet. Do not include analysis, explanation, or any other content.**
168
+ """
169
+
170
  try:
171
  response = self.groq_client.chat.completions.create(
172
  messages=[
173
  {"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
174
+ {"role": "user", "content": prompt},
175
  ],
176
  model="llama-3.1-70b-versatile",
177
  temperature=1.0, # Increased temperature for more diversity
 
183
  except Exception as e:
184
  print(f"Error generating tweet: {e}")
185
  return "Error generating tweet"
186
+