Manasa1 commited on
Commit
2d11b96
·
verified ·
1 Parent(s): 5028070

Update tweet_analyzer.py

Browse files
Files changed (1) hide show
  1. tweet_analyzer.py +16 -41
tweet_analyzer.py CHANGED
@@ -8,7 +8,6 @@ from datetime import datetime
8
  from sklearn.decomposition import NMF
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  import random
11
- from transformers import GPT2Tokenizer
12
 
13
  class TweetDatasetProcessor:
14
  def __init__(self):
@@ -16,7 +15,6 @@ class TweetDatasetProcessor:
16
  self.groq_client = groq.Groq(api_key=os.getenv('Groq_api'))
17
  self.tweets = []
18
  self.personality_profile = {}
19
- self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2') # Initialize tokenizer
20
 
21
  def extract_text_from_pdf(self, pdf_path):
22
  """Extract text content from PDF file."""
@@ -74,46 +72,24 @@ class TweetDatasetProcessor:
74
  """Extract hashtags from tweet."""
75
  return [word for word in text.split() if word.startswith('#')]
76
 
77
- def truncate_to_token_limit(self, tweets, max_tokens=6000):
78
- """Truncate tweets to fit within token limit."""
79
- total_tokens = 0
80
- truncated_tweets = []
81
- for tweet in tweets:
82
- tokens = self.tokenizer.encode(tweet)
83
- if total_tokens + len(tokens) > max_tokens:
84
- break
85
- total_tokens += len(tokens)
86
- truncated_tweets.append(tweet)
87
- return truncated_tweets
88
-
89
  def analyze_personality(self):
90
  """Comprehensive personality analysis."""
91
  all_tweets = [tweet['content'] for tweet in self.tweets]
92
-
93
- # Truncate tweets to avoid exceeding token limit
94
- truncated_tweets = self.truncate_to_token_limit(all_tweets, max_tokens=6000)
95
-
96
- # Create analysis prompt with truncated tweets
97
  analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
98
  Core beliefs, emotional tendencies, cognitive patterns, etc.
99
  Tweets for analysis:
100
- {json.dumps(truncated_tweets, indent=2)}
101
  """
102
 
103
- try:
104
- response = self.groq_client.chat.completions.create(
105
- messages=[
106
- {"role": "system", "content": "You are an expert psychologist."},
107
- {"role": "user", "content": analysis_prompt},
108
- ],
109
- model="llama-3.1-70b-versatile",
110
- temperature=0.1,
111
- )
112
- self.personality_profile = response.choices[0].message.content
113
- except Exception as e:
114
- print(f"Error processing personality analysis: {e}")
115
- self.personality_profile = {}
116
-
117
  return self.personality_profile
118
 
119
  def analyze_topics(self, n_topics=5):
@@ -156,12 +132,9 @@ class TweetDatasetProcessor:
156
  {context}
157
  Additionally, consider these contexts to increase diversity:
158
  {', '.join(selected_contexts)}
159
- Generate a tweet that the author would write, ensuring that the tweet:
160
- 1. Reflects the author's personality traits, core beliefs, and values.
161
- 2. Incorporates insights from multiple topics when possible.
162
- 3. Uses a natural communication style and vocabulary.
163
- 4. Includes relevant mentions or hashtags if applicable.
164
- The tweet should feel diverse and authentic, touching on a variety of topics."""
165
 
166
  try:
167
  response = self.groq_client.chat.completions.create(
@@ -173,7 +146,9 @@ class TweetDatasetProcessor:
173
  temperature=1.0, # Increased temperature for more diversity
174
  max_tokens=150,
175
  )
176
- return response.choices[0].message.content
 
 
177
  except Exception as e:
178
  print(f"Error generating tweet: {e}")
179
  return "Error generating tweet"
 
8
  from sklearn.decomposition import NMF
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  import random
 
11
 
12
  class TweetDatasetProcessor:
13
  def __init__(self):
 
15
  self.groq_client = groq.Groq(api_key=os.getenv('Groq_api'))
16
  self.tweets = []
17
  self.personality_profile = {}
 
18
 
19
  def extract_text_from_pdf(self, pdf_path):
20
  """Extract text content from PDF file."""
 
72
  """Extract hashtags from tweet."""
73
  return [word for word in text.split() if word.startswith('#')]
74
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def analyze_personality(self):
76
  """Comprehensive personality analysis."""
77
  all_tweets = [tweet['content'] for tweet in self.tweets]
 
 
 
 
 
78
  analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
79
  Core beliefs, emotional tendencies, cognitive patterns, etc.
80
  Tweets for analysis:
81
+ {json.dumps(all_tweets[:30], indent=2)}
82
  """
83
 
84
+ response = self.groq_client.chat.completions.create(
85
+ messages=[
86
+ {"role": "system", "content": "You are an expert psychologist."},
87
+ {"role": "user", "content": analysis_prompt},
88
+ ],
89
+ model="llama-3.1-70b-versatile",
90
+ temperature=0.1,
91
+ )
92
+ self.personality_profile = response.choices[0].message.content
 
 
 
 
 
93
  return self.personality_profile
94
 
95
  def analyze_topics(self, n_topics=5):
 
132
  {context}
133
  Additionally, consider these contexts to increase diversity:
134
  {', '.join(selected_contexts)}
135
+
136
+ **Only generate the tweet. Do not include analysis, explanation, or any other content.**
137
+ """
 
 
 
138
 
139
  try:
140
  response = self.groq_client.chat.completions.create(
 
146
  temperature=1.0, # Increased temperature for more diversity
147
  max_tokens=150,
148
  )
149
+ tweet = response.choices[0].message.content
150
+ # Ensure the response only contains the tweet text, and nothing else.
151
+ return tweet.strip().split("\n")[0] # Only return the first line (tweet)
152
  except Exception as e:
153
  print(f"Error generating tweet: {e}")
154
  return "Error generating tweet"