Spaces:
Sleeping
Sleeping
Update tweet_analyzer.py
Browse files- tweet_analyzer.py +5 -5
tweet_analyzer.py
CHANGED
@@ -78,7 +78,7 @@ class TweetDatasetProcessor:
|
|
78 |
analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
|
79 |
Core beliefs, emotional tendencies, cognitive patterns, etc.
|
80 |
Tweets for analysis:
|
81 |
-
{json.dumps(all_tweets[:
|
82 |
"""
|
83 |
|
84 |
response = self.groq_client.chat.completions.create(
|
@@ -92,7 +92,7 @@ class TweetDatasetProcessor:
|
|
92 |
self.personality_profile = response.choices[0].message.content
|
93 |
return self.personality_profile
|
94 |
|
95 |
-
def analyze_topics(self, n_topics=
|
96 |
"""Extract and identify different topics the author has tweeted about."""
|
97 |
all_tweets = [tweet['content'] for tweet in self.tweets]
|
98 |
vectorizer = TfidfVectorizer(stop_words='english')
|
@@ -120,18 +120,18 @@ class TweetDatasetProcessor:
|
|
120 |
]
|
121 |
|
122 |
# Extract historical topics and add them to additional contexts
|
123 |
-
historical_topics = self.analyze_topics(n_topics=
|
124 |
additional_contexts.extend(historical_topics)
|
125 |
|
126 |
# Randomly select multiple contexts to increase diversity
|
127 |
selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
|
128 |
|
129 |
# Randomly sample tweets across different time periods to avoid repetition of topics
|
130 |
-
tweet_sample = random.sample(self.tweets, min(
|
131 |
all_tweets = [tweet['content'] for tweet in tweet_sample]
|
132 |
|
133 |
# If personality profile is too long, truncate it (adjust length as needed)
|
134 |
-
personality_profile_excerpt = self.personality_profile[:
|
135 |
|
136 |
generation_prompt = f"""Based on this personality profile:
|
137 |
{personality_profile_excerpt}
|
|
|
78 |
analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
|
79 |
Core beliefs, emotional tendencies, cognitive patterns, etc.
|
80 |
Tweets for analysis:
|
81 |
+
{json.dumps(all_tweets[:20], indent=2)} # Reduce the number of tweets analyzed
|
82 |
"""
|
83 |
|
84 |
response = self.groq_client.chat.completions.create(
|
|
|
92 |
self.personality_profile = response.choices[0].message.content
|
93 |
return self.personality_profile
|
94 |
|
95 |
+
def analyze_topics(self, n_topics=3): # Reduce the number of topics
|
96 |
"""Extract and identify different topics the author has tweeted about."""
|
97 |
all_tweets = [tweet['content'] for tweet in self.tweets]
|
98 |
vectorizer = TfidfVectorizer(stop_words='english')
|
|
|
120 |
]
|
121 |
|
122 |
# Extract historical topics and add them to additional contexts
|
123 |
+
historical_topics = self.analyze_topics(n_topics=3) # Reduced number of topics
|
124 |
additional_contexts.extend(historical_topics)
|
125 |
|
126 |
# Randomly select multiple contexts to increase diversity
|
127 |
selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
|
128 |
|
129 |
# Randomly sample tweets across different time periods to avoid repetition of topics
|
130 |
+
tweet_sample = random.sample(self.tweets, min(20, len(self.tweets))) # Reduce the number of tweets sampled
|
131 |
all_tweets = [tweet['content'] for tweet in tweet_sample]
|
132 |
|
133 |
# If personality profile is too long, truncate it (adjust length as needed)
|
134 |
+
personality_profile_excerpt = self.personality_profile[:500] # Truncate further
|
135 |
|
136 |
generation_prompt = f"""Based on this personality profile:
|
137 |
{personality_profile_excerpt}
|