smhavens commited on
Commit
01959cc
·
1 Parent(s): ec3e101

Please work

Browse files
Files changed (2) hide show
  1. app.py +0 -47
  2. train.py +1 -47
app.py CHANGED
@@ -83,29 +83,6 @@ def training():
83
  train_data = dataset["train"]
84
  # For agility we only 1/2 of our available data
85
  n_examples = dataset["train"].num_rows // 2
86
- # n_remaining = dataset["train"].num_rows - n_examples
87
- # dataset_clean = {}
88
- # # dataset_0 = []
89
- # # dataset_1 = []
90
- # # dataset_2 = []
91
- # # dataset_3 = []
92
- # for i in range(n_examples):
93
- # dataset_clean[i] = {}
94
- # dataset_clean[i]["text"] = normalize(train_data[i]["text"], lowercase=True, remove_stopwords=True)
95
- # dataset_clean[i]["label"] = train_data[i]["label"]
96
- # if train_data[i]["label"] == 0:
97
- # dataset_0.append(dataset_clean[i])
98
- # elif train_data[i]["label"] == 1:
99
- # dataset_1.append(dataset_clean[i])
100
- # elif train_data[i]["label"] == 2:
101
- # dataset_2.append(dataset_clean[i])
102
- # elif train_data[i]["label"] == 3:
103
- # dataset_3.append(dataset_clean[i])
104
- # n_0 = len(dataset_0) // 2
105
- # n_1 = len(dataset_1) // 2
106
- # n_2 = len(dataset_2) // 2
107
- # n_3 = len(dataset_3) // 2
108
- # print("Label lengths:", len(dataset_0), len(dataset_1), len(dataset_2), len(dataset_3))
109
 
110
  for i in range(n_examples):
111
  example = train_data[i]
@@ -113,30 +90,6 @@ def training():
113
  # print(example["text"])
114
  train_examples.append(InputExample(texts=[example['text']], label=example['label']))
115
 
116
- # for i in range(n_0):
117
- # example = dataset_0[i]
118
- # # example_opposite = dataset_0[-(i)]
119
- # # print(example["text"])
120
- # train_examples.append(InputExample(texts=[example['text']], label=0))
121
-
122
- # for i in range(n_1):
123
- # example = dataset_1[i]
124
- # # example_opposite = dataset_1[-(i)]
125
- # # print(example["text"])
126
- # train_examples.append(InputExample(texts=[example['text']], label=1))
127
-
128
- # for i in range(n_2):
129
- # example = dataset_2[i]
130
- # # example_opposite = dataset_2[-(i)]
131
- # # print(example["text"])
132
- # train_examples.append(InputExample(texts=[example['text']], label=2))
133
-
134
- # for i in range(n_3):
135
- # example = dataset_3[i]
136
- # # example_opposite = dataset_3[-(i)]
137
- # # print(example["text"])
138
- # train_examples.append(InputExample(texts=[example['text']], label=3))
139
-
140
  train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=25)
141
 
142
  print("END DATALOADER")
 
83
  train_data = dataset["train"]
84
  # For agility we only 1/2 of our available data
85
  n_examples = dataset["train"].num_rows // 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  for i in range(n_examples):
88
  example = train_data[i]
 
90
  # print(example["text"])
91
  train_examples.append(InputExample(texts=[example['text']], label=example['label']))
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=25)
94
 
95
  print("END DATALOADER")
train.py CHANGED
@@ -92,59 +92,13 @@ def training():
92
  train_data = dataset["train"]
93
  # For agility we only 1/2 of our available data
94
  n_examples = dataset["train"].num_rows // 2
95
- # n_remaining = dataset["train"].num_rows - n_examples
96
- # dataset_clean = {}
97
- # # dataset_0 = []
98
- # # dataset_1 = []
99
- # # dataset_2 = []
100
- # # dataset_3 = []
101
- # for i in range(n_examples):
102
- # dataset_clean[i] = {}
103
- # dataset_clean[i]["text"] = normalize(train_data[i]["text"], lowercase=True, remove_stopwords=True)
104
- # dataset_clean[i]["label"] = train_data[i]["label"]
105
- # if train_data[i]["label"] == 0:
106
- # dataset_0.append(dataset_clean[i])
107
- # elif train_data[i]["label"] == 1:
108
- # dataset_1.append(dataset_clean[i])
109
- # elif train_data[i]["label"] == 2:
110
- # dataset_2.append(dataset_clean[i])
111
- # elif train_data[i]["label"] == 3:
112
- # dataset_3.append(dataset_clean[i])
113
- # n_0 = len(dataset_0) // 2
114
- # n_1 = len(dataset_1) // 2
115
- # n_2 = len(dataset_2) // 2
116
- # n_3 = len(dataset_3) // 2
117
- # print("Label lengths:", len(dataset_0), len(dataset_1), len(dataset_2), len(dataset_3))
118
 
119
  for i in range(n_examples):
120
  example = train_data[i]
121
  # example_opposite = dataset_clean[-(i)]
122
  # print(example["text"])
123
  train_examples.append(InputExample(texts=[example['text']], label=example['label']))
124
-
125
- # for i in range(n_0):
126
- # example = dataset_0[i]
127
- # # example_opposite = dataset_0[-(i)]
128
- # # print(example["text"])
129
- # train_examples.append(InputExample(texts=[example['text']], label=0))
130
-
131
- # for i in range(n_1):
132
- # example = dataset_1[i]
133
- # # example_opposite = dataset_1[-(i)]
134
- # # print(example["text"])
135
- # train_examples.append(InputExample(texts=[example['text']], label=1))
136
-
137
- # for i in range(n_2):
138
- # example = dataset_2[i]
139
- # # example_opposite = dataset_2[-(i)]
140
- # # print(example["text"])
141
- # train_examples.append(InputExample(texts=[example['text']], label=2))
142
-
143
- # for i in range(n_3):
144
- # example = dataset_3[i]
145
- # # example_opposite = dataset_3[-(i)]
146
- # # print(example["text"])
147
- # train_examples.append(InputExample(texts=[example['text']], label=3))
148
 
149
  train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=25)
150
 
 
92
  train_data = dataset["train"]
93
  # For agility we only 1/2 of our available data
94
  n_examples = dataset["train"].num_rows // 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  for i in range(n_examples):
97
  example = train_data[i]
98
  # example_opposite = dataset_clean[-(i)]
99
  # print(example["text"])
100
  train_examples.append(InputExample(texts=[example['text']], label=example['label']))
101
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=25)
104