Spaces:

farah1
/

mental-health-bm25

Sleeping

App Files Files Community

farah1 commited on Dec 9, 2024

Commit

50ca63c

verified ·

1 Parent(s): 610a6d3

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -22

app.py CHANGED Viewed

@@ -8,40 +8,31 @@ Original file is located at
 """
 import os
-import json
 import pandas as pd
 from rank_bm25 import BM25Okapi
 import gradio as gr
 import openai
 from datasets import load_dataset
-# Ensure Hugging Face token exists
 HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     raise ValueError("Hugging Face token is not set. Please set HF_TOKEN as an environment variable.")
-# Ensure OpenAI API key exists
-openai.api_key = os.getenv("OPENAI_API_KEY")
-if not openai.api_key:
-    raise ValueError("OpenAI API key is not set. Please set OPENAI_API_KEY as an environment variable.")
 # Explicitly define dataset file paths
 data_files = {
     "train": "hf://datasets/farah1/mental-health-posts-classification/train.csv",
     "validation": "hf://datasets/farah1/mental-health-posts-classification/validation.csv",
 }
-# Load dataset
 try:
     print("Loading dataset...")
     dataset = load_dataset("csv", data_files=data_files)
     train_data = dataset["train"].to_pandas()
-    validation_data = dataset["validation"].to_pandas()
     print("Dataset loaded successfully.")
 except Exception as e:
     print(f"Failed to load dataset: {e}")
-    train_data = pd.DataFrame()  # Fallback to empty DataFrame
-    validation_data = pd.DataFrame()
 # Check and create the 'text' column
 if "text" not in train_data.columns:
@@ -50,19 +41,17 @@ if "text" not in train_data.columns:
     else:
         raise ValueError("The 'text' column is missing, and the required 'title' and 'content' columns are not available to create it.")
-# Ensure the necessary columns exist in the training dataset
-required_columns = ["text", "Ground_Truth_Stress", "Ground_Truth_Anxiety", "Ground_Truth_Depression", "Ground_Truth_Other_binary"]
-for column in required_columns:
-    if column not in train_data.columns:
-        raise ValueError(f"Missing required column '{column}' in the training dataset.")
 # Initialize BM25
 tokenized_train = [doc.split() for doc in train_data["text"]]
 bm25 = BM25Okapi(tokenized_train)
 # Few-shot classification function
-def classify_text(input_text, k=20):
     # Tokenize input text
     tokenized_text = input_text.split()
     # Get top-k similar examples using BM25
@@ -106,17 +95,21 @@ def classify_text(input_text, k=20):
         return content  # Return the label directly
     except Exception as e:
         print(f"Error occurred: {e}")
-        return "Error in classification."
 # Gradio Interface
 interface = gr.Interface(
     fn=classify_text,
-    inputs=gr.Textbox(lines=5, placeholder="Enter your thoughts or feelings..."),
     outputs="text",
     title="Mental Health Classifier",
-    description="Enter text, and the system will classify it into one of the following categories: Stress, Anxiety, Depression, or Other.",
 )
 if __name__ == "__main__":
     interface.launch()

 """
 import os
 import pandas as pd
 from rank_bm25 import BM25Okapi
 import gradio as gr
 import openai
 from datasets import load_dataset
+# Load Hugging Face dataset
 HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     raise ValueError("Hugging Face token is not set. Please set HF_TOKEN as an environment variable.")
 # Explicitly define dataset file paths
 data_files = {
     "train": "hf://datasets/farah1/mental-health-posts-classification/train.csv",
     "validation": "hf://datasets/farah1/mental-health-posts-classification/validation.csv",
 }
 try:
     print("Loading dataset...")
     dataset = load_dataset("csv", data_files=data_files)
     train_data = dataset["train"].to_pandas()
     print("Dataset loaded successfully.")
 except Exception as e:
     print(f"Failed to load dataset: {e}")
+    train_data = pd.DataFrame()
 # Check and create the 'text' column
 if "text" not in train_data.columns:
     else:
         raise ValueError("The 'text' column is missing, and the required 'title' and 'content' columns are not available to create it.")
 # Initialize BM25
 tokenized_train = [doc.split() for doc in train_data["text"]]
 bm25 = BM25Okapi(tokenized_train)
 # Few-shot classification function
+def classify_text(api_key, input_text, k=20):
+    # Set the API key
+    openai.api_key = api_key
+    if not openai.api_key:
+        return "Error: OpenAI API key is not set."
     # Tokenize input text
     tokenized_text = input_text.split()
     # Get top-k similar examples using BM25
         return content  # Return the label directly
     except Exception as e:
         print(f"Error occurred: {e}")
+        return f"Error: {e}"
 # Gradio Interface
 interface = gr.Interface(
     fn=classify_text,
+    inputs=[
+        gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here..."),
+        gr.Textbox(lines=5, label="Input Text", placeholder="Enter your thoughts or feelings..."),
+    ],
     outputs="text",
     title="Mental Health Classifier",
+    description="Enter your OpenAI API key and input text. The system will classify the text into one of the following categories: Stress, Anxiety, Depression, or Other.",
 )
 if __name__ == "__main__":
     interface.launch()