Fred808 commited on
Commit
6d0fc85
·
verified ·
1 Parent(s): 291a511

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -32
app.py CHANGED
@@ -139,38 +139,19 @@ topic_extractor = TopicExtractor()
139
  chatbot = Chatbot()
140
 
141
  # Load the yt-commons dataset
142
- def load_youtube_data():
143
- try:
144
- # Load the dataset from Hugging Face Hub
145
- dataset = load_dataset("PleIAs/YouTube-Commons", data_files="*.parquet") # Load all .parquet files
146
- return dataset
147
- except Exception as e:
148
- print(f"Error loading yt-commons dataset: {e}")
149
- return None
150
-
151
-
152
- # Load the dataset
153
- youtube_data = load_youtube_data()
154
-
155
- if youtube_data:
156
- print("Dataset loaded successfully!")
157
- print(f"Number of splits: {len(youtube_data)}")
158
- print(f"Available splits: {list(youtube_data.keys())}")
159
-
160
- # Inspect the first few rows of the dataset
161
- print("\nSample data from the 'train' split:")
162
- print(youtube_data["train"][0]) # Adjust based on the dataset structure
163
- else:
164
- print("Failed to load dataset.")
165
-
166
-
167
- # Preprocess and build search index
168
- youtube_data = load_youtube_data()
169
- if youtube_data:
170
- print("Dataset loaded successfully!")
171
- print(f"Number of samples: {len(youtube_data)}")
172
- else:
173
- print("Failed to load dataset.")
174
 
175
  # API Endpoints
176
  @app.route("/classify", methods=["POST"])
 
139
  chatbot = Chatbot()
140
 
141
  # Load the yt-commons dataset
142
+ from datasets import load_dataset
143
+
144
+ # Load specific .parquet files
145
+ dataset = load_dataset("PleIAs/YouTube-Commons", data_files=["cctube_0.parquet", "cctube_1.parquet"], streaming=True)
146
+
147
+ # Extract specific columns
148
+ for example in dataset["train"]:
149
+ title = example["title"] # Replace 'title' with the correct column name
150
+ description = example["description"] # Replace 'description' with the correct column name
151
+ print(f"Title: {title}")
152
+ print(f"Description: {description}")
153
+ break # Stop after the first example for demonstration
154
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  # API Endpoints
157
  @app.route("/classify", methods=["POST"])