Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -139,38 +139,19 @@ topic_extractor = TopicExtractor()
|
|
139 |
chatbot = Chatbot()
|
140 |
|
141 |
# Load the yt-commons dataset
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
if youtube_data:
|
156 |
-
print("Dataset loaded successfully!")
|
157 |
-
print(f"Number of splits: {len(youtube_data)}")
|
158 |
-
print(f"Available splits: {list(youtube_data.keys())}")
|
159 |
-
|
160 |
-
# Inspect the first few rows of the dataset
|
161 |
-
print("\nSample data from the 'train' split:")
|
162 |
-
print(youtube_data["train"][0]) # Adjust based on the dataset structure
|
163 |
-
else:
|
164 |
-
print("Failed to load dataset.")
|
165 |
-
|
166 |
-
|
167 |
-
# Preprocess and build search index
|
168 |
-
youtube_data = load_youtube_data()
|
169 |
-
if youtube_data:
|
170 |
-
print("Dataset loaded successfully!")
|
171 |
-
print(f"Number of samples: {len(youtube_data)}")
|
172 |
-
else:
|
173 |
-
print("Failed to load dataset.")
|
174 |
|
175 |
# API Endpoints
|
176 |
@app.route("/classify", methods=["POST"])
|
|
|
139 |
chatbot = Chatbot()
|
140 |
|
141 |
# Load the yt-commons dataset
|
142 |
+
from datasets import load_dataset
|
143 |
+
|
144 |
+
# Load specific .parquet files
|
145 |
+
dataset = load_dataset("PleIAs/YouTube-Commons", data_files=["cctube_0.parquet", "cctube_1.parquet"], streaming=True)
|
146 |
+
|
147 |
+
# Extract specific columns
|
148 |
+
for example in dataset["train"]:
|
149 |
+
title = example["title"] # Replace 'title' with the correct column name
|
150 |
+
description = example["description"] # Replace 'description' with the correct column name
|
151 |
+
print(f"Title: {title}")
|
152 |
+
print(f"Description: {description}")
|
153 |
+
break # Stop after the first example for demonstration
|
154 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
# API Endpoints
|
157 |
@app.route("/classify", methods=["POST"])
|