import openai from rank_bm25 import BM25Okapi class MentalHealthClassifier: def __init__(self, train_data): # Tokenize the training data for BM25 self.tokenized_train = [doc.split() for doc in train_data["text"]] self.bm25 = BM25Okapi(self.tokenized_train) self.train_data = train_data def classify_text(self, api_key, input_text, k=20): # Set the OpenAI API key openai.api_key = api_key if not openai.api_key: return "Error: OpenAI API key is not set." # Tokenize input text tokenized_text = input_text.split() # Get top-k similar examples using BM25 scores = self.bm25.get_scores(tokenized_text) top_k_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:k] # Build examples for the prompt examples = "\n".join( f"Example {i+1}:\nText: {self.train_data.iloc[idx]['text']}\nClassification: " f"Stress={self.train_data.iloc[idx]['Ground_Truth_Stress']}, " f"Anxiety={self.train_data.iloc[idx]['Ground_Truth_Anxiety']}, " f"Depression={self.train_data.iloc[idx]['Ground_Truth_Depression']}, " f"Other={self.train_data.iloc[idx]['Ground_Truth_Other_binary']}\n" for i, idx in enumerate(top_k_indices) ) # Construct OpenAI prompt prompt = f""" You are a mental health specialist. Analyze the provided text and classify it into one or more of the following categories: Stress, Anxiety, Depression, or Other. Respond with a single category that best matches the content: Stress, Anxiety, Depression, or Other. Here is the text to classify: "{input_text}" ### Examples: {examples} """ try: response = openai.ChatCompletion.create( messages=[ {"role": "system", "content": "You are a mental health specialist."}, {"role": "user", "content": prompt}, ], model="gpt-4", temperature=0, ) content = response.choices[0].message.content.strip() return content # Return the label directly except Exception as e: return f"Error: {e}"