Spaces:

Manasa1
/

Jack_Clone

Sleeping

App Files Files Community

Manasa1 commited on Nov 17, 2024

Commit

2d11b96

verified ·

1 Parent(s): 5028070

Update tweet_analyzer.py

Browse files

Files changed (1) hide show

tweet_analyzer.py +16 -41

tweet_analyzer.py CHANGED Viewed

@@ -8,7 +8,6 @@ from datetime import datetime
 from sklearn.decomposition import NMF
 from sklearn.feature_extraction.text import TfidfVectorizer
 import random
-from transformers import GPT2Tokenizer
 class TweetDatasetProcessor:
     def __init__(self):
@@ -16,7 +15,6 @@ class TweetDatasetProcessor:
         self.groq_client = groq.Groq(api_key=os.getenv('Groq_api'))
         self.tweets = []
         self.personality_profile = {}
-        self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')  # Initialize tokenizer
     def extract_text_from_pdf(self, pdf_path):
         """Extract text content from PDF file."""
@@ -74,46 +72,24 @@ class TweetDatasetProcessor:
         """Extract hashtags from tweet."""
         return [word for word in text.split() if word.startswith('#')]
-    def truncate_to_token_limit(self, tweets, max_tokens=6000):
-        """Truncate tweets to fit within token limit."""
-        total_tokens = 0
-        truncated_tweets = []
-        for tweet in tweets:
-            tokens = self.tokenizer.encode(tweet)
-            if total_tokens + len(tokens) > max_tokens:
-                break
-            total_tokens += len(tokens)
-            truncated_tweets.append(tweet)
-        return truncated_tweets
     def analyze_personality(self):
         """Comprehensive personality analysis."""
         all_tweets = [tweet['content'] for tweet in self.tweets]
-        # Truncate tweets to avoid exceeding token limit
-        truncated_tweets = self.truncate_to_token_limit(all_tweets, max_tokens=6000)
-        # Create analysis prompt with truncated tweets
         analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
         Core beliefs, emotional tendencies, cognitive patterns, etc.
         Tweets for analysis:
-        {json.dumps(truncated_tweets, indent=2)}
         """
-        try:
-            response = self.groq_client.chat.completions.create(
-                messages=[
-                    {"role": "system", "content": "You are an expert psychologist."},
-                    {"role": "user", "content": analysis_prompt},
-                ],
-                model="llama-3.1-70b-versatile",
-                temperature=0.1,
-            )
-            self.personality_profile = response.choices[0].message.content
-        except Exception as e:
-            print(f"Error processing personality analysis: {e}")
-            self.personality_profile = {}
         return self.personality_profile
     def analyze_topics(self, n_topics=5):
@@ -156,12 +132,9 @@ class TweetDatasetProcessor:
         {context}
         Additionally, consider these contexts to increase diversity:
         {', '.join(selected_contexts)}
-        Generate a tweet that the author would write, ensuring that the tweet:
-        1. Reflects the author's personality traits, core beliefs, and values.
-        2. Incorporates insights from multiple topics when possible.
-        3. Uses a natural communication style and vocabulary.
-        4. Includes relevant mentions or hashtags if applicable.
-        The tweet should feel diverse and authentic, touching on a variety of topics."""
         try:
             response = self.groq_client.chat.completions.create(
@@ -173,7 +146,9 @@ class TweetDatasetProcessor:
                 temperature=1.0,  # Increased temperature for more diversity
                 max_tokens=150,
             )
-            return response.choices[0].message.content
         except Exception as e:
             print(f"Error generating tweet: {e}")
             return "Error generating tweet"

 from sklearn.decomposition import NMF
 from sklearn.feature_extraction.text import TfidfVectorizer
 import random
 class TweetDatasetProcessor:
     def __init__(self):
         self.groq_client = groq.Groq(api_key=os.getenv('Groq_api'))
         self.tweets = []
         self.personality_profile = {}
     def extract_text_from_pdf(self, pdf_path):
         """Extract text content from PDF file."""
         """Extract hashtags from tweet."""
         return [word for word in text.split() if word.startswith('#')]
     def analyze_personality(self):
         """Comprehensive personality analysis."""
         all_tweets = [tweet['content'] for tweet in self.tweets]
         analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
         Core beliefs, emotional tendencies, cognitive patterns, etc.
         Tweets for analysis:
+        {json.dumps(all_tweets[:30], indent=2)}
         """
+        response = self.groq_client.chat.completions.create(
+            messages=[
+                {"role": "system", "content": "You are an expert psychologist."},
+                {"role": "user", "content": analysis_prompt},
+            ],
+            model="llama-3.1-70b-versatile",
+            temperature=0.1,
+        )
+        self.personality_profile = response.choices[0].message.content
         return self.personality_profile
     def analyze_topics(self, n_topics=5):
         {context}
         Additionally, consider these contexts to increase diversity:
         {', '.join(selected_contexts)}
+        **Only generate the tweet. Do not include analysis, explanation, or any other content.**
+        """
         try:
             response = self.groq_client.chat.completions.create(
                 temperature=1.0,  # Increased temperature for more diversity
                 max_tokens=150,
             )
+            tweet = response.choices[0].message.content
+            # Ensure the response only contains the tweet text, and nothing else.
+            return tweet.strip().split("\n")[0]  # Only return the first line (tweet)
         except Exception as e:
             print(f"Error generating tweet: {e}")
             return "Error generating tweet"