Spaces:

Manasa1
/

Jack_Clone

Sleeping

App Files Files Community

Manasa1 commited on Nov 17, 2024

Commit

4c87df2

verified ·

1 Parent(s): f85caf0

Update tweet_analyzer.py

Browse files

Files changed (1) hide show

tweet_analyzer.py +49 -22

tweet_analyzer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os
 from PyPDF2 import PdfReader
 import pandas as pd
 from dotenv import load_dotenv
@@ -8,6 +8,7 @@ from datetime import datetime
 from sklearn.decomposition import NMF
 from sklearn.feature_extraction.text import TfidfVectorizer
 import random
 class TweetDatasetProcessor:
     def __init__(self):
@@ -15,6 +16,7 @@ class TweetDatasetProcessor:
         self.groq_client = groq.Groq(api_key=os.getenv('Groq_api'))
         self.tweets = []
         self.personality_profile = {}
     def extract_text_from_pdf(self, pdf_path):
         """Extract text content from PDF file."""
@@ -72,24 +74,46 @@ class TweetDatasetProcessor:
         """Extract hashtags from tweet."""
         return [word for word in text.split() if word.startswith('#')]
     def analyze_personality(self):
         """Comprehensive personality analysis."""
         all_tweets = [tweet['content'] for tweet in self.tweets]
         analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
         Core beliefs, emotional tendencies, cognitive patterns, etc.
         Tweets for analysis:
-        {json.dumps(all_tweets[:30], indent=2)}
         """
-        response = self.groq_client.chat.completions.create(
-            messages=[
-                {"role": "system", "content": "You are an expert psychologist."},
-                {"role": "user", "content": analysis_prompt},
-            ],
-            model="llama-3.1-70b-versatile",
-            temperature=0.1,
-        )
-        self.personality_profile = response.choices[0].message.content
         return self.personality_profile
     def analyze_topics(self, n_topics=5):
@@ -139,14 +163,17 @@ class TweetDatasetProcessor:
         4. Includes relevant mentions or hashtags if applicable.
         The tweet should feel diverse and authentic, touching on a variety of topics."""
-        response = self.groq_client.chat.completions.create(
-            messages=[
-                {"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
-                {"role": "user", "content": generation_prompt},
-            ],
-            model="llama-3.1-70b-versatile",
-            temperature=1.0,  # Increased temperature for more diversity
-            max_tokens=150,
-        )
-        return response.choices[0].message.content

+import os
 from PyPDF2 import PdfReader
 import pandas as pd
 from dotenv import load_dotenv
 from sklearn.decomposition import NMF
 from sklearn.feature_extraction.text import TfidfVectorizer
 import random
+from transformers import GPT2Tokenizer
 class TweetDatasetProcessor:
     def __init__(self):
         self.groq_client = groq.Groq(api_key=os.getenv('Groq_api'))
         self.tweets = []
         self.personality_profile = {}
+        self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')  # Initialize tokenizer
     def extract_text_from_pdf(self, pdf_path):
         """Extract text content from PDF file."""
         """Extract hashtags from tweet."""
         return [word for word in text.split() if word.startswith('#')]
+    def truncate_to_token_limit(self, tweets, max_tokens=6000):
+        """Truncate tweets to fit within token limit."""
+        total_tokens = 0
+        truncated_tweets = []
+        for tweet in tweets:
+            tokens = self.tokenizer.encode(tweet)
+            if total_tokens + len(tokens) > max_tokens:
+                break
+            total_tokens += len(tokens)
+            truncated_tweets.append(tweet)
+        return truncated_tweets
     def analyze_personality(self):
         """Comprehensive personality analysis."""
         all_tweets = [tweet['content'] for tweet in self.tweets]
+        # Truncate tweets to avoid exceeding token limit
+        truncated_tweets = self.truncate_to_token_limit(all_tweets, max_tokens=6000)
+        # Create analysis prompt with truncated tweets
         analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
         Core beliefs, emotional tendencies, cognitive patterns, etc.
         Tweets for analysis:
+        {json.dumps(truncated_tweets, indent=2)}
         """
+        try:
+            response = self.groq_client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": "You are an expert psychologist."},
+                    {"role": "user", "content": analysis_prompt},
+                ],
+                model="llama-3.1-70b-versatile",
+                temperature=0.1,
+            )
+            self.personality_profile = response.choices[0].message.content
+        except Exception as e:
+            print(f"Error processing personality analysis: {e}")
+            self.personality_profile = {}
         return self.personality_profile
     def analyze_topics(self, n_topics=5):
         4. Includes relevant mentions or hashtags if applicable.
         The tweet should feel diverse and authentic, touching on a variety of topics."""
+        try:
+            response = self.groq_client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
+                    {"role": "user", "content": generation_prompt},
+                ],
+                model="llama-3.1-70b-versatile",
+                temperature=1.0,  # Increased temperature for more diversity
+                max_tokens=150,
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            print(f"Error generating tweet: {e}")
+            return "Error generating tweet"