Spaces:

farah1
/

mental-health-bm25

Sleeping

farah1 commited on Dec 9, 2024

Commit

b369092

verified ·

1 Parent(s): 92c2e11

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,23 +33,27 @@ try:
     train_data = dataset["train"].to_pandas()
     validation_data = dataset["validation"].to_pandas()
     print("Dataset loaded successfully.")
 except Exception as e:
     print(f"Failed to load dataset: {e}")
     train_data = pd.DataFrame()  # Fallback to empty DataFrame
     validation_data = pd.DataFrame()
 # Ensure the necessary columns exist in the training dataset
-required_columns = ["content", "Ground_Truth_Stress", "Ground_Truth_Anxiety", "Ground_Truth_Depression", "Ground_Truth_Other_binary"]
-if not train_data.empty:
-    for column in required_columns:
-        if column not in train_data.columns:
-            raise ValueError(f"Missing required column '{column}' in the training dataset.")
-else:
-    print("Training dataset is empty. Exiting.")
-    exit(1)
 # Initialize BM25
-tokenized_train = [doc.split() for doc in train_data["content"]]
 bm25 = BM25Okapi(tokenized_train)
 # Set OpenAI API key
@@ -114,3 +118,4 @@ interface = gr.Interface(
 if __name__ == "__main__":
     interface.launch()

     train_data = dataset["train"].to_pandas()
     validation_data = dataset["validation"].to_pandas()
     print("Dataset loaded successfully.")
+    print("Train dataset columns:", train_data.columns)
 except Exception as e:
     print(f"Failed to load dataset: {e}")
     train_data = pd.DataFrame()  # Fallback to empty DataFrame
     validation_data = pd.DataFrame()
+# Check and create the 'text' column
+if "text" not in train_data.columns:
+    if "title" in train_data.columns and "content" in train_data.columns:
+        train_data["text"] = train_data["title"] + " " + train_data["content"]
+    else:
+        raise ValueError("The 'text' column is missing, and the required 'title' and 'content' columns are not available to create it.")
 # Ensure the necessary columns exist in the training dataset
+required_columns = ["text", "Ground_Truth_Stress", "Ground_Truth_Anxiety", "Ground_Truth_Depression", "Ground_Truth_Other_binary"]
+for column in required_columns:
+    if column not in train_data.columns:
+        raise ValueError(f"Missing required column '{column}' in the training dataset.")
 # Initialize BM25
+tokenized_train = [doc.split() for doc in train_data["text"]]
 bm25 = BM25Okapi(tokenized_train)
 # Set OpenAI API key
 if __name__ == "__main__":
     interface.launch()