|
import openai |
|
from rank_bm25 import BM25Okapi |
|
|
|
class MentalHealthClassifier: |
|
def __init__(self, train_data): |
|
|
|
self.tokenized_train = [doc.split() for doc in train_data["text"]] |
|
self.bm25 = BM25Okapi(self.tokenized_train) |
|
self.train_data = train_data |
|
|
|
def classify_text(self, api_key, input_text, k=20): |
|
|
|
openai.api_key = api_key |
|
if not openai.api_key: |
|
return "Error: OpenAI API key is not set." |
|
|
|
|
|
tokenized_text = input_text.split() |
|
|
|
scores = self.bm25.get_scores(tokenized_text) |
|
top_k_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:k] |
|
|
|
|
|
examples = "\n".join( |
|
f"Example {i+1}:\nText: {self.train_data.iloc[idx]['text']}\nClassification: " |
|
f"Stress={self.train_data.iloc[idx]['Ground_Truth_Stress']}, " |
|
f"Anxiety={self.train_data.iloc[idx]['Ground_Truth_Anxiety']}, " |
|
f"Depression={self.train_data.iloc[idx]['Ground_Truth_Depression']}, " |
|
f"Other={self.train_data.iloc[idx]['Ground_Truth_Other_binary']}\n" |
|
for i, idx in enumerate(top_k_indices) |
|
) |
|
|
|
|
|
prompt = f""" |
|
You are a mental health specialist. Analyze the provided text and classify it into one or more of the following categories: Stress, Anxiety, Depression, or Other. |
|
|
|
Respond with a single category that best matches the content: Stress, Anxiety, Depression, or Other. |
|
|
|
Here is the text to classify: |
|
"{input_text}" |
|
|
|
### Examples: |
|
{examples} |
|
""" |
|
|
|
try: |
|
response = openai.ChatCompletion.create( |
|
messages=[ |
|
{"role": "system", "content": "You are a mental health specialist."}, |
|
{"role": "user", "content": prompt}, |
|
], |
|
model="gpt-4", |
|
temperature=0, |
|
) |
|
content = response.choices[0].message.content.strip() |
|
return content |
|
except Exception as e: |
|
return f"Error: {e}" |
|
|