Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,10 +15,15 @@ import gradio as gr
|
|
15 |
import openai
|
16 |
from datasets import load_dataset
|
17 |
|
18 |
-
# Ensure Hugging Face
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Explicitly define dataset file paths
|
24 |
data_files = {
|
@@ -33,7 +38,6 @@ try:
|
|
33 |
train_data = dataset["train"].to_pandas()
|
34 |
validation_data = dataset["validation"].to_pandas()
|
35 |
print("Dataset loaded successfully.")
|
36 |
-
print("Train dataset columns:", train_data.columns)
|
37 |
except Exception as e:
|
38 |
print(f"Failed to load dataset: {e}")
|
39 |
train_data = pd.DataFrame() # Fallback to empty DataFrame
|
@@ -56,11 +60,6 @@ for column in required_columns:
|
|
56 |
tokenized_train = [doc.split() for doc in train_data["text"]]
|
57 |
bm25 = BM25Okapi(tokenized_train)
|
58 |
|
59 |
-
# Set OpenAI API key
|
60 |
-
openai.api_key = os.getenv("OPENAI_API_KEY")
|
61 |
-
if not openai.api_key:
|
62 |
-
raise ValueError("OpenAI API key is not set. Please set it as an environment variable.")
|
63 |
-
|
64 |
# Few-shot classification function
|
65 |
def classify_text(input_text, k=20):
|
66 |
# Tokenize input text
|
@@ -102,9 +101,16 @@ def classify_text(input_text, k=20):
|
|
102 |
model="gpt-4",
|
103 |
temperature=0,
|
104 |
)
|
|
|
|
|
|
|
105 |
results = response.choices[0].message.content
|
106 |
return json.loads(results)
|
|
|
|
|
|
|
107 |
except Exception as e:
|
|
|
108 |
return {"error": str(e)}
|
109 |
|
110 |
# Gradio Interface
|
|
|
15 |
import openai
|
16 |
from datasets import load_dataset
|
17 |
|
18 |
+
# Ensure Hugging Face token exists
|
19 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
20 |
+
if not HF_TOKEN:
|
21 |
+
raise ValueError("Hugging Face token is not set. Please set HF_TOKEN as an environment variable.")
|
22 |
+
|
23 |
+
# Ensure OpenAI API key exists
|
24 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
25 |
+
if not openai.api_key:
|
26 |
+
raise ValueError("OpenAI API key is not set. Please set OPENAI_API_KEY as an environment variable.")
|
27 |
|
28 |
# Explicitly define dataset file paths
|
29 |
data_files = {
|
|
|
38 |
train_data = dataset["train"].to_pandas()
|
39 |
validation_data = dataset["validation"].to_pandas()
|
40 |
print("Dataset loaded successfully.")
|
|
|
41 |
except Exception as e:
|
42 |
print(f"Failed to load dataset: {e}")
|
43 |
train_data = pd.DataFrame() # Fallback to empty DataFrame
|
|
|
60 |
tokenized_train = [doc.split() for doc in train_data["text"]]
|
61 |
bm25 = BM25Okapi(tokenized_train)
|
62 |
|
|
|
|
|
|
|
|
|
|
|
63 |
# Few-shot classification function
|
64 |
def classify_text(input_text, k=20):
|
65 |
# Tokenize input text
|
|
|
101 |
model="gpt-4",
|
102 |
temperature=0,
|
103 |
)
|
104 |
+
print("OpenAI Response:", response)
|
105 |
+
if "choices" not in response or not response["choices"]:
|
106 |
+
raise ValueError("Invalid or empty response from OpenAI.")
|
107 |
results = response.choices[0].message.content
|
108 |
return json.loads(results)
|
109 |
+
except json.JSONDecodeError:
|
110 |
+
print("Failed to decode JSON from OpenAI response.")
|
111 |
+
return {"error": "Failed to decode JSON from OpenAI response."}
|
112 |
except Exception as e:
|
113 |
+
print(f"Error occurred: {e}")
|
114 |
return {"error": str(e)}
|
115 |
|
116 |
# Gradio Interface
|