Spaces:

Ankitajadhav
/

Whats_Cooking

Runtime error

Ankitajadhav commited on Jul 7, 2024

Commit

91b2664

verified ·

1 Parent(s): 1ab12bd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -46,11 +46,11 @@ class VectorStore:
         self.collection = self.chroma_client.create_collection(name=collection_name)
     # Method to populate the vector store with embeddings from a dataset
-    def populate_vectors(self, dataset, batch_size=100):
         # Use dataset streaming
         #dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train[:1500]', streaming=True)
         dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train')
-        dataset = dataset.select(range(500))  # Select the first 1500 examples
         texts = []
         i = 0  # Initialize index
@@ -92,7 +92,7 @@ vector_store.populate_vectors(dataset=None)
 def fine_tune_model():
     # Load your dataset
     dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train')
-    dataset = dataset.select(range(500))  # Select the first 1500 examples
     # Prepare the data for training
     def tokenize_function(examples):

         self.collection = self.chroma_client.create_collection(name=collection_name)
     # Method to populate the vector store with embeddings from a dataset
+    def populate_vectors(self, dataset, batch_size=10):
         # Use dataset streaming
         #dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train[:1500]', streaming=True)
         dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train')
+        dataset = dataset.select(range(50))  # Select the first 1500 examples
         texts = []
         i = 0  # Initialize index
 def fine_tune_model():
     # Load your dataset
     dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train')
+    dataset = dataset.select(range(50))  # Select the first 1500 examples
     # Prepare the data for training
     def tokenize_function(examples):