Spaces:

Manasa1
/

Jack_Clone

Sleeping

App Files Files Community

Manasa1 commited on Nov 17, 2024

Commit

60b3b65

verified ·

1 Parent(s): 10f6a71

Update tweet_analyzer.py

Browse files

Files changed (1) hide show

tweet_analyzer.py +36 -11

tweet_analyzer.py CHANGED Viewed

@@ -78,7 +78,7 @@ class TweetDatasetProcessor:
         analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
         Core beliefs, emotional tendencies, cognitive patterns, etc.
         Tweets for analysis:
-        {json.dumps(all_tweets[:20], indent=2)}  # Reduce the number of tweets analyzed
         """
         response = self.groq_client.chat.completions.create(
@@ -92,7 +92,7 @@ class TweetDatasetProcessor:
         self.personality_profile = response.choices[0].message.content
         return self.personality_profile
-    def analyze_topics(self, n_topics=3):  # Reduce the number of topics
         """Extract and identify different topics the author has tweeted about."""
         all_tweets = [tweet['content'] for tweet in self.tweets]
         vectorizer = TfidfVectorizer(stop_words='english')
@@ -109,9 +109,16 @@ class TweetDatasetProcessor:
         topics = list(set(topics))
         return topics
     def generate_tweet(self, context=""):
         """Generate a new tweet based on personality profile and optional context."""
-        additional_contexts = [
             "Comment on a recent technological advancement.",
             "Share a motivational thought.",
             "Discuss a current trending topic.",
@@ -119,35 +126,52 @@ class TweetDatasetProcessor:
             "Provide advice to followers."
         ]
-        # Extract historical topics and add them to additional contexts
-        historical_topics = self.analyze_topics(n_topics=3)  # Reduced number of topics
-        additional_contexts.extend(historical_topics)
         # Randomly select multiple contexts to increase diversity
         selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
         # Randomly sample tweets across different time periods to avoid repetition of topics
-        tweet_sample = random.sample(self.tweets, min(20, len(self.tweets)))  # Reduce the number of tweets sampled
         all_tweets = [tweet['content'] for tweet in tweet_sample]
         # If personality profile is too long, truncate it (adjust length as needed)
-        personality_profile_excerpt = self.personality_profile[:500]  # Truncate further
-        generation_prompt = f"""Based on this personality profile:
         {personality_profile_excerpt}
         Current context or topic (if any):
         {context}
         Additionally, consider these contexts to increase diversity:
         {', '.join(selected_contexts)}
         **Only generate the tweet. Do not include analysis, explanation, or any other content.**
         """
         try:
             response = self.groq_client.chat.completions.create(
                 messages=[
                     {"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
-                    {"role": "user", "content": generation_prompt},
                 ],
                 model="llama-3.1-70b-versatile",
                 temperature=1.0,  # Increased temperature for more diversity
@@ -159,3 +183,4 @@ class TweetDatasetProcessor:
         except Exception as e:
             print(f"Error generating tweet: {e}")
             return "Error generating tweet"

         analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
         Core beliefs, emotional tendencies, cognitive patterns, etc.
         Tweets for analysis:
+        {json.dumps(all_tweets[:5], indent=2)}  # Further reduced number of tweets
         """
         response = self.groq_client.chat.completions.create(
         self.personality_profile = response.choices[0].message.content
         return self.personality_profile
+    def analyze_topics(self, n_topics=3):  # Reduced the number of topics
         """Extract and identify different topics the author has tweeted about."""
         all_tweets = [tweet['content'] for tweet in self.tweets]
         vectorizer = TfidfVectorizer(stop_words='english')
         topics = list(set(topics))
         return topics
+    def count_tokens(self, text):
+        """Estimate the number of tokens in the given text."""
+        # A basic token count estimation (approximate)
+        return len(text.split())
     def generate_tweet(self, context=""):
         """Generate a new tweet based on personality profile and optional context."""
+        # Extract historical topics and add them to additional contexts
+        historical_topics = self.analyze_topics(n_topics=3)  # Reduced number of topics
+        additional_contexts = historical_topics + [
             "Comment on a recent technological advancement.",
             "Share a motivational thought.",
             "Discuss a current trending topic.",
             "Provide advice to followers."
         ]
         # Randomly select multiple contexts to increase diversity
         selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
         # Randomly sample tweets across different time periods to avoid repetition of topics
+        tweet_sample = random.sample(self.tweets, min(5, len(self.tweets)))  # Further reduced number of tweets
         all_tweets = [tweet['content'] for tweet in tweet_sample]
         # If personality profile is too long, truncate it (adjust length as needed)
+        personality_profile_excerpt = self.personality_profile[:400]  # Further truncation
+        # Combine everything and check token count
+        prompt = f"""Based on this personality profile:
         {personality_profile_excerpt}
         Current context or topic (if any):
         {context}
         Additionally, consider these contexts to increase diversity:
         {', '.join(selected_contexts)}
+        Tweets for context:
+        {', '.join(all_tweets)}
         **Only generate the tweet. Do not include analysis, explanation, or any other content.**
         """
+        token_count = self.count_tokens(prompt)
+        if token_count > 6000:  # Limit to 6000 tokens (adjust as needed)
+            # Further truncate the tweet and topics if token limit is exceeded
+            all_tweets = all_tweets[:3]  # Reduce the number of tweets used
+            prompt = f"""Based on this personality profile:
+            {personality_profile_excerpt}
+            Current context or topic (if any):
+            {context}
+            Additionally, consider these contexts to increase diversity:
+            {', '.join(selected_contexts)}
+            Tweets for context:
+            {', '.join(all_tweets)}
+            **Only generate the tweet. Do not include analysis, explanation, or any other content.**
+            """
         try:
             response = self.groq_client.chat.completions.create(
                 messages=[
                     {"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
+                    {"role": "user", "content": prompt},
                 ],
                 model="llama-3.1-70b-versatile",
                 temperature=1.0,  # Increased temperature for more diversity
         except Exception as e:
             print(f"Error generating tweet: {e}")
             return "Error generating tweet"