Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,40 +8,31 @@ Original file is located at
|
|
8 |
"""
|
9 |
|
10 |
import os
|
11 |
-
import json
|
12 |
import pandas as pd
|
13 |
from rank_bm25 import BM25Okapi
|
14 |
import gradio as gr
|
15 |
import openai
|
16 |
from datasets import load_dataset
|
17 |
|
18 |
-
#
|
19 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
20 |
if not HF_TOKEN:
|
21 |
raise ValueError("Hugging Face token is not set. Please set HF_TOKEN as an environment variable.")
|
22 |
|
23 |
-
# Ensure OpenAI API key exists
|
24 |
-
openai.api_key = os.getenv("OPENAI_API_KEY")
|
25 |
-
if not openai.api_key:
|
26 |
-
raise ValueError("OpenAI API key is not set. Please set OPENAI_API_KEY as an environment variable.")
|
27 |
-
|
28 |
# Explicitly define dataset file paths
|
29 |
data_files = {
|
30 |
"train": "hf://datasets/farah1/mental-health-posts-classification/train.csv",
|
31 |
"validation": "hf://datasets/farah1/mental-health-posts-classification/validation.csv",
|
32 |
}
|
33 |
|
34 |
-
# Load dataset
|
35 |
try:
|
36 |
print("Loading dataset...")
|
37 |
dataset = load_dataset("csv", data_files=data_files)
|
38 |
train_data = dataset["train"].to_pandas()
|
39 |
-
validation_data = dataset["validation"].to_pandas()
|
40 |
print("Dataset loaded successfully.")
|
41 |
except Exception as e:
|
42 |
print(f"Failed to load dataset: {e}")
|
43 |
-
train_data = pd.DataFrame()
|
44 |
-
validation_data = pd.DataFrame()
|
45 |
|
46 |
# Check and create the 'text' column
|
47 |
if "text" not in train_data.columns:
|
@@ -50,19 +41,17 @@ if "text" not in train_data.columns:
|
|
50 |
else:
|
51 |
raise ValueError("The 'text' column is missing, and the required 'title' and 'content' columns are not available to create it.")
|
52 |
|
53 |
-
# Ensure the necessary columns exist in the training dataset
|
54 |
-
required_columns = ["text", "Ground_Truth_Stress", "Ground_Truth_Anxiety", "Ground_Truth_Depression", "Ground_Truth_Other_binary"]
|
55 |
-
for column in required_columns:
|
56 |
-
if column not in train_data.columns:
|
57 |
-
raise ValueError(f"Missing required column '{column}' in the training dataset.")
|
58 |
-
|
59 |
# Initialize BM25
|
60 |
tokenized_train = [doc.split() for doc in train_data["text"]]
|
61 |
bm25 = BM25Okapi(tokenized_train)
|
62 |
|
63 |
-
|
64 |
# Few-shot classification function
|
65 |
-
def classify_text(input_text, k=20):
|
|
|
|
|
|
|
|
|
|
|
66 |
# Tokenize input text
|
67 |
tokenized_text = input_text.split()
|
68 |
# Get top-k similar examples using BM25
|
@@ -106,17 +95,21 @@ def classify_text(input_text, k=20):
|
|
106 |
return content # Return the label directly
|
107 |
except Exception as e:
|
108 |
print(f"Error occurred: {e}")
|
109 |
-
return "Error
|
110 |
|
111 |
# Gradio Interface
|
112 |
interface = gr.Interface(
|
113 |
fn=classify_text,
|
114 |
-
inputs=
|
|
|
|
|
|
|
115 |
outputs="text",
|
116 |
title="Mental Health Classifier",
|
117 |
-
description="Enter
|
118 |
)
|
119 |
|
120 |
if __name__ == "__main__":
|
121 |
interface.launch()
|
122 |
|
|
|
|
8 |
"""
|
9 |
|
10 |
import os
|
|
|
11 |
import pandas as pd
|
12 |
from rank_bm25 import BM25Okapi
|
13 |
import gradio as gr
|
14 |
import openai
|
15 |
from datasets import load_dataset
|
16 |
|
17 |
+
# Load Hugging Face dataset
|
18 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
19 |
if not HF_TOKEN:
|
20 |
raise ValueError("Hugging Face token is not set. Please set HF_TOKEN as an environment variable.")
|
21 |
|
|
|
|
|
|
|
|
|
|
|
22 |
# Explicitly define dataset file paths
|
23 |
data_files = {
|
24 |
"train": "hf://datasets/farah1/mental-health-posts-classification/train.csv",
|
25 |
"validation": "hf://datasets/farah1/mental-health-posts-classification/validation.csv",
|
26 |
}
|
27 |
|
|
|
28 |
try:
|
29 |
print("Loading dataset...")
|
30 |
dataset = load_dataset("csv", data_files=data_files)
|
31 |
train_data = dataset["train"].to_pandas()
|
|
|
32 |
print("Dataset loaded successfully.")
|
33 |
except Exception as e:
|
34 |
print(f"Failed to load dataset: {e}")
|
35 |
+
train_data = pd.DataFrame()
|
|
|
36 |
|
37 |
# Check and create the 'text' column
|
38 |
if "text" not in train_data.columns:
|
|
|
41 |
else:
|
42 |
raise ValueError("The 'text' column is missing, and the required 'title' and 'content' columns are not available to create it.")
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
# Initialize BM25
|
45 |
tokenized_train = [doc.split() for doc in train_data["text"]]
|
46 |
bm25 = BM25Okapi(tokenized_train)
|
47 |
|
|
|
48 |
# Few-shot classification function
|
49 |
+
def classify_text(api_key, input_text, k=20):
|
50 |
+
# Set the API key
|
51 |
+
openai.api_key = api_key
|
52 |
+
if not openai.api_key:
|
53 |
+
return "Error: OpenAI API key is not set."
|
54 |
+
|
55 |
# Tokenize input text
|
56 |
tokenized_text = input_text.split()
|
57 |
# Get top-k similar examples using BM25
|
|
|
95 |
return content # Return the label directly
|
96 |
except Exception as e:
|
97 |
print(f"Error occurred: {e}")
|
98 |
+
return f"Error: {e}"
|
99 |
|
100 |
# Gradio Interface
|
101 |
interface = gr.Interface(
|
102 |
fn=classify_text,
|
103 |
+
inputs=[
|
104 |
+
gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here..."),
|
105 |
+
gr.Textbox(lines=5, label="Input Text", placeholder="Enter your thoughts or feelings..."),
|
106 |
+
],
|
107 |
outputs="text",
|
108 |
title="Mental Health Classifier",
|
109 |
+
description="Enter your OpenAI API key and input text. The system will classify the text into one of the following categories: Stress, Anxiety, Depression, or Other.",
|
110 |
)
|
111 |
|
112 |
if __name__ == "__main__":
|
113 |
interface.launch()
|
114 |
|
115 |
+
|