Spaces:
Sleeping
Sleeping
Update tweet_analyzer.py
Browse files- tweet_analyzer.py +36 -11
tweet_analyzer.py
CHANGED
@@ -78,7 +78,7 @@ class TweetDatasetProcessor:
|
|
78 |
analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
|
79 |
Core beliefs, emotional tendencies, cognitive patterns, etc.
|
80 |
Tweets for analysis:
|
81 |
-
{json.dumps(all_tweets[:
|
82 |
"""
|
83 |
|
84 |
response = self.groq_client.chat.completions.create(
|
@@ -92,7 +92,7 @@ class TweetDatasetProcessor:
|
|
92 |
self.personality_profile = response.choices[0].message.content
|
93 |
return self.personality_profile
|
94 |
|
95 |
-
def analyze_topics(self, n_topics=3): #
|
96 |
"""Extract and identify different topics the author has tweeted about."""
|
97 |
all_tweets = [tweet['content'] for tweet in self.tweets]
|
98 |
vectorizer = TfidfVectorizer(stop_words='english')
|
@@ -109,9 +109,16 @@ class TweetDatasetProcessor:
|
|
109 |
topics = list(set(topics))
|
110 |
return topics
|
111 |
|
|
|
|
|
|
|
|
|
|
|
112 |
def generate_tweet(self, context=""):
|
113 |
"""Generate a new tweet based on personality profile and optional context."""
|
114 |
-
|
|
|
|
|
115 |
"Comment on a recent technological advancement.",
|
116 |
"Share a motivational thought.",
|
117 |
"Discuss a current trending topic.",
|
@@ -119,35 +126,52 @@ class TweetDatasetProcessor:
|
|
119 |
"Provide advice to followers."
|
120 |
]
|
121 |
|
122 |
-
# Extract historical topics and add them to additional contexts
|
123 |
-
historical_topics = self.analyze_topics(n_topics=3) # Reduced number of topics
|
124 |
-
additional_contexts.extend(historical_topics)
|
125 |
-
|
126 |
# Randomly select multiple contexts to increase diversity
|
127 |
selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
|
128 |
|
129 |
# Randomly sample tweets across different time periods to avoid repetition of topics
|
130 |
-
tweet_sample = random.sample(self.tweets, min(
|
131 |
all_tweets = [tweet['content'] for tweet in tweet_sample]
|
132 |
|
133 |
# If personality profile is too long, truncate it (adjust length as needed)
|
134 |
-
personality_profile_excerpt = self.personality_profile[:
|
135 |
|
136 |
-
|
|
|
137 |
{personality_profile_excerpt}
|
138 |
Current context or topic (if any):
|
139 |
{context}
|
140 |
Additionally, consider these contexts to increase diversity:
|
141 |
{', '.join(selected_contexts)}
|
142 |
|
|
|
|
|
|
|
143 |
**Only generate the tweet. Do not include analysis, explanation, or any other content.**
|
144 |
"""
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
try:
|
147 |
response = self.groq_client.chat.completions.create(
|
148 |
messages=[
|
149 |
{"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
|
150 |
-
{"role": "user", "content":
|
151 |
],
|
152 |
model="llama-3.1-70b-versatile",
|
153 |
temperature=1.0, # Increased temperature for more diversity
|
@@ -159,3 +183,4 @@ class TweetDatasetProcessor:
|
|
159 |
except Exception as e:
|
160 |
print(f"Error generating tweet: {e}")
|
161 |
return "Error generating tweet"
|
|
|
|
78 |
analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
|
79 |
Core beliefs, emotional tendencies, cognitive patterns, etc.
|
80 |
Tweets for analysis:
|
81 |
+
{json.dumps(all_tweets[:5], indent=2)} # Further reduced number of tweets
|
82 |
"""
|
83 |
|
84 |
response = self.groq_client.chat.completions.create(
|
|
|
92 |
self.personality_profile = response.choices[0].message.content
|
93 |
return self.personality_profile
|
94 |
|
95 |
+
def analyze_topics(self, n_topics=3): # Reduced the number of topics
|
96 |
"""Extract and identify different topics the author has tweeted about."""
|
97 |
all_tweets = [tweet['content'] for tweet in self.tweets]
|
98 |
vectorizer = TfidfVectorizer(stop_words='english')
|
|
|
109 |
topics = list(set(topics))
|
110 |
return topics
|
111 |
|
112 |
+
def count_tokens(self, text):
|
113 |
+
"""Estimate the number of tokens in the given text."""
|
114 |
+
# A basic token count estimation (approximate)
|
115 |
+
return len(text.split())
|
116 |
+
|
117 |
def generate_tweet(self, context=""):
|
118 |
"""Generate a new tweet based on personality profile and optional context."""
|
119 |
+
# Extract historical topics and add them to additional contexts
|
120 |
+
historical_topics = self.analyze_topics(n_topics=3) # Reduced number of topics
|
121 |
+
additional_contexts = historical_topics + [
|
122 |
"Comment on a recent technological advancement.",
|
123 |
"Share a motivational thought.",
|
124 |
"Discuss a current trending topic.",
|
|
|
126 |
"Provide advice to followers."
|
127 |
]
|
128 |
|
|
|
|
|
|
|
|
|
129 |
# Randomly select multiple contexts to increase diversity
|
130 |
selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
|
131 |
|
132 |
# Randomly sample tweets across different time periods to avoid repetition of topics
|
133 |
+
tweet_sample = random.sample(self.tweets, min(5, len(self.tweets))) # Further reduced number of tweets
|
134 |
all_tweets = [tweet['content'] for tweet in tweet_sample]
|
135 |
|
136 |
# If personality profile is too long, truncate it (adjust length as needed)
|
137 |
+
personality_profile_excerpt = self.personality_profile[:400] # Further truncation
|
138 |
|
139 |
+
# Combine everything and check token count
|
140 |
+
prompt = f"""Based on this personality profile:
|
141 |
{personality_profile_excerpt}
|
142 |
Current context or topic (if any):
|
143 |
{context}
|
144 |
Additionally, consider these contexts to increase diversity:
|
145 |
{', '.join(selected_contexts)}
|
146 |
|
147 |
+
Tweets for context:
|
148 |
+
{', '.join(all_tweets)}
|
149 |
+
|
150 |
**Only generate the tweet. Do not include analysis, explanation, or any other content.**
|
151 |
"""
|
152 |
|
153 |
+
token_count = self.count_tokens(prompt)
|
154 |
+
if token_count > 6000: # Limit to 6000 tokens (adjust as needed)
|
155 |
+
# Further truncate the tweet and topics if token limit is exceeded
|
156 |
+
all_tweets = all_tweets[:3] # Reduce the number of tweets used
|
157 |
+
prompt = f"""Based on this personality profile:
|
158 |
+
{personality_profile_excerpt}
|
159 |
+
Current context or topic (if any):
|
160 |
+
{context}
|
161 |
+
Additionally, consider these contexts to increase diversity:
|
162 |
+
{', '.join(selected_contexts)}
|
163 |
+
|
164 |
+
Tweets for context:
|
165 |
+
{', '.join(all_tweets)}
|
166 |
+
|
167 |
+
**Only generate the tweet. Do not include analysis, explanation, or any other content.**
|
168 |
+
"""
|
169 |
+
|
170 |
try:
|
171 |
response = self.groq_client.chat.completions.create(
|
172 |
messages=[
|
173 |
{"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
|
174 |
+
{"role": "user", "content": prompt},
|
175 |
],
|
176 |
model="llama-3.1-70b-versatile",
|
177 |
temperature=1.0, # Increased temperature for more diversity
|
|
|
183 |
except Exception as e:
|
184 |
print(f"Error generating tweet: {e}")
|
185 |
return "Error generating tweet"
|
186 |
+
|