farah1 commited on
Commit
50ca63c
·
verified ·
1 Parent(s): 610a6d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -22
app.py CHANGED
@@ -8,40 +8,31 @@ Original file is located at
8
  """
9
 
10
  import os
11
- import json
12
  import pandas as pd
13
  from rank_bm25 import BM25Okapi
14
  import gradio as gr
15
  import openai
16
  from datasets import load_dataset
17
 
18
- # Ensure Hugging Face token exists
19
  HF_TOKEN = os.getenv("HF_TOKEN")
20
  if not HF_TOKEN:
21
  raise ValueError("Hugging Face token is not set. Please set HF_TOKEN as an environment variable.")
22
 
23
- # Ensure OpenAI API key exists
24
- openai.api_key = os.getenv("OPENAI_API_KEY")
25
- if not openai.api_key:
26
- raise ValueError("OpenAI API key is not set. Please set OPENAI_API_KEY as an environment variable.")
27
-
28
  # Explicitly define dataset file paths
29
  data_files = {
30
  "train": "hf://datasets/farah1/mental-health-posts-classification/train.csv",
31
  "validation": "hf://datasets/farah1/mental-health-posts-classification/validation.csv",
32
  }
33
 
34
- # Load dataset
35
  try:
36
  print("Loading dataset...")
37
  dataset = load_dataset("csv", data_files=data_files)
38
  train_data = dataset["train"].to_pandas()
39
- validation_data = dataset["validation"].to_pandas()
40
  print("Dataset loaded successfully.")
41
  except Exception as e:
42
  print(f"Failed to load dataset: {e}")
43
- train_data = pd.DataFrame() # Fallback to empty DataFrame
44
- validation_data = pd.DataFrame()
45
 
46
  # Check and create the 'text' column
47
  if "text" not in train_data.columns:
@@ -50,19 +41,17 @@ if "text" not in train_data.columns:
50
  else:
51
  raise ValueError("The 'text' column is missing, and the required 'title' and 'content' columns are not available to create it.")
52
 
53
- # Ensure the necessary columns exist in the training dataset
54
- required_columns = ["text", "Ground_Truth_Stress", "Ground_Truth_Anxiety", "Ground_Truth_Depression", "Ground_Truth_Other_binary"]
55
- for column in required_columns:
56
- if column not in train_data.columns:
57
- raise ValueError(f"Missing required column '{column}' in the training dataset.")
58
-
59
  # Initialize BM25
60
  tokenized_train = [doc.split() for doc in train_data["text"]]
61
  bm25 = BM25Okapi(tokenized_train)
62
 
63
-
64
  # Few-shot classification function
65
- def classify_text(input_text, k=20):
 
 
 
 
 
66
  # Tokenize input text
67
  tokenized_text = input_text.split()
68
  # Get top-k similar examples using BM25
@@ -106,17 +95,21 @@ def classify_text(input_text, k=20):
106
  return content # Return the label directly
107
  except Exception as e:
108
  print(f"Error occurred: {e}")
109
- return "Error in classification."
110
 
111
  # Gradio Interface
112
  interface = gr.Interface(
113
  fn=classify_text,
114
- inputs=gr.Textbox(lines=5, placeholder="Enter your thoughts or feelings..."),
 
 
 
115
  outputs="text",
116
  title="Mental Health Classifier",
117
- description="Enter text, and the system will classify it into one of the following categories: Stress, Anxiety, Depression, or Other.",
118
  )
119
 
120
  if __name__ == "__main__":
121
  interface.launch()
122
 
 
 
8
  """
9
 
10
  import os
 
11
  import pandas as pd
12
  from rank_bm25 import BM25Okapi
13
  import gradio as gr
14
  import openai
15
  from datasets import load_dataset
16
 
17
+ # Load Hugging Face dataset
18
  HF_TOKEN = os.getenv("HF_TOKEN")
19
  if not HF_TOKEN:
20
  raise ValueError("Hugging Face token is not set. Please set HF_TOKEN as an environment variable.")
21
 
 
 
 
 
 
22
  # Explicitly define dataset file paths
23
  data_files = {
24
  "train": "hf://datasets/farah1/mental-health-posts-classification/train.csv",
25
  "validation": "hf://datasets/farah1/mental-health-posts-classification/validation.csv",
26
  }
27
 
 
28
  try:
29
  print("Loading dataset...")
30
  dataset = load_dataset("csv", data_files=data_files)
31
  train_data = dataset["train"].to_pandas()
 
32
  print("Dataset loaded successfully.")
33
  except Exception as e:
34
  print(f"Failed to load dataset: {e}")
35
+ train_data = pd.DataFrame()
 
36
 
37
  # Check and create the 'text' column
38
  if "text" not in train_data.columns:
 
41
  else:
42
  raise ValueError("The 'text' column is missing, and the required 'title' and 'content' columns are not available to create it.")
43
 
 
 
 
 
 
 
44
  # Initialize BM25
45
  tokenized_train = [doc.split() for doc in train_data["text"]]
46
  bm25 = BM25Okapi(tokenized_train)
47
 
 
48
  # Few-shot classification function
49
+ def classify_text(api_key, input_text, k=20):
50
+ # Set the API key
51
+ openai.api_key = api_key
52
+ if not openai.api_key:
53
+ return "Error: OpenAI API key is not set."
54
+
55
  # Tokenize input text
56
  tokenized_text = input_text.split()
57
  # Get top-k similar examples using BM25
 
95
  return content # Return the label directly
96
  except Exception as e:
97
  print(f"Error occurred: {e}")
98
+ return f"Error: {e}"
99
 
100
  # Gradio Interface
101
  interface = gr.Interface(
102
  fn=classify_text,
103
+ inputs=[
104
+ gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here..."),
105
+ gr.Textbox(lines=5, label="Input Text", placeholder="Enter your thoughts or feelings..."),
106
+ ],
107
  outputs="text",
108
  title="Mental Health Classifier",
109
+ description="Enter your OpenAI API key and input text. The system will classify the text into one of the following categories: Stress, Anxiety, Depression, or Other.",
110
  )
111
 
112
  if __name__ == "__main__":
113
  interface.launch()
114
 
115
+