seemapatil commited on
Commit
3db2689
·
1 Parent(s): bf65949

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -1,5 +1,5 @@
1
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments
2
- from datasets import load_dataset
3
  import csv
4
 
5
  # Read requirements.txt file
@@ -19,8 +19,11 @@ with open('IMDB Dataset.csv', 'r') as csv_file:
19
  }
20
  preprocessed_data.append(preprocessed_entry)
21
 
22
- # Convert the preprocessed data to a dataset
23
- dataset = load_dataset('csv', data=preprocessed_data, delimiter=',')
 
 
 
24
 
25
  # Tokenize the dataset
26
  tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
@@ -38,3 +41,4 @@ import numpy as np
38
  import evaluate
39
 
40
  metric = evaluate.load("accuracy")
 
 
1
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments
2
+ from datasets import pandas, Dataset
3
  import csv
4
 
5
  # Read requirements.txt file
 
19
  }
20
  preprocessed_data.append(preprocessed_entry)
21
 
22
+ # Convert the preprocessed data to a pandas DataFrame
23
+ df = pandas.DataFrame(preprocessed_data)
24
+
25
+ # Convert the DataFrame to a datasets dataset
26
+ dataset = Dataset.from_pandas(df)
27
 
28
  # Tokenize the dataset
29
  tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
 
41
  import evaluate
42
 
43
  metric = evaluate.load("accuracy")
44
+